add doc to postproc

This commit is contained in:
phiresky 2022-11-28 10:46:58 +01:00
parent 54799f1452
commit 7c7b6a03ae

View File

@ -71,6 +71,10 @@ impl Read for ReadErr {
} }
}*/ }*/
/**
* Detects and converts encodings other than utf-8 to utf-8.
* If the input stream does not contain valid text, returns the string `[rga: binary data]` instead
*/
pub fn postproc_encoding( pub fn postproc_encoding(
line_prefix: &str, line_prefix: &str,
inp: impl AsyncRead + Send + 'static, inp: impl AsyncRead + Send + 'static,
@ -117,6 +121,9 @@ pub fn postproc_encoding(
))*/ ))*/
} }
/**
* adds the given prefix to each line in a AsyncRead
*/
pub fn postproc_prefix(line_prefix: &str, inp: impl AsyncRead + Send) -> impl AsyncRead + Send { pub fn postproc_prefix(line_prefix: &str, inp: impl AsyncRead + Send) -> impl AsyncRead + Send {
let line_prefix_n = format!("\n{}", line_prefix); // clone since we need it later let line_prefix_n = format!("\n{}", line_prefix); // clone since we need it later
let line_prefix_o = Bytes::copy_from_slice(line_prefix.as_bytes()); let line_prefix_o = Bytes::copy_from_slice(line_prefix.as_bytes());
@ -140,6 +147,11 @@ pub fn postproc_prefix(line_prefix: &str, inp: impl AsyncRead + Send) -> impl As
StreamReader::new(oup_stream) StreamReader::new(oup_stream)
} }
/**
* adds the prefix `Page N:` to each line,
* where N starts at one and is incremented for each ASCII Form Feed character in the input stream.
* (That's the format output by pdftotext)
*/
pub fn postproc_pagebreaks(line_prefix: &str, inp: impl AsyncRead) -> impl AsyncRead { pub fn postproc_pagebreaks(line_prefix: &str, inp: impl AsyncRead) -> impl AsyncRead {
let line_prefix = line_prefix.to_string(); // clone since let line_prefix = line_prefix.to_string(); // clone since
let mut page_count = 1; let mut page_count = 1;