Restore line_prefix parameter to postproc_pagebreaks

This commit is contained in:
Joseph LaFreniere 2022-12-24 21:56:57 -06:00
parent 8f7f5a1ed4
commit 4cac831a8f
No known key found for this signature in database
GPG Key ID: EE236AA0141EFCA3

View File

@ -146,11 +146,15 @@ pub fn postproc_prefix(line_prefix: &str, inp: impl AsyncRead + Send) -> impl As
/// Adds the prefix "Page N:" to each line,
/// where N starts at one and is incremented for each ASCII Form Feed character in the input stream.
/// ASCII form feeds are the page delimiters output by `pdftotext`.
pub fn postproc_pagebreaks(input: impl AsyncRead + Send) -> impl AsyncRead + Send {
pub fn postproc_pagebreaks(
line_prefix: &str,
input: impl AsyncRead + Send,
) -> impl AsyncRead + Send {
let line_prefix_o: String = line_prefix.into();
let regex_linefeed = regex::bytes::Regex::new(r"\x0c").unwrap();
let regex_newline = regex::bytes::Regex::new("\n").unwrap();
let mut page_count: i32 = 1;
let mut page_prefix: String = format!("Page {}:", page_count);
let mut page_prefix: String = format!("Page {}:{}", page_count, line_prefix_o);
let input_stream = ReaderStream::new(input);
let output_stream = stream! {
@ -162,10 +166,10 @@ pub fn postproc_pagebreaks(input: impl AsyncRead + Send) -> impl AsyncRead + Sen
for sub_chunk in sub_chunks {
// println!("{}", String::from_utf8_lossy(page_prefix.as_bytes()));
yield Ok(Bytes::copy_from_slice(page_prefix.as_bytes()));
page_prefix = format!("\nPage {}:", page_count);
page_prefix = format!("\nPage {}:{}", page_count, line_prefix_o);
yield Ok(Bytes::copy_from_slice(&regex_newline.replace_all(&sub_chunk, page_prefix.as_bytes())));
page_count += 1;
page_prefix = format!("\nPage {}:", page_count);
page_prefix = format!("\nPage {}:{}", page_count, line_prefix_o);
}
}
}
@ -187,7 +191,7 @@ mod tests {
let mock: Mock = Builder::new()
.read(b"Hello\nWorld\x0cFoo Bar\n\x0cTest")
.build();
let res = postproc_pagebreaks(mock).read_to_end(&mut output).await;
let res = postproc_pagebreaks("", mock).read_to_end(&mut output).await;
println!("{}", String::from_utf8_lossy(&output));
assert!(matches!(res, Ok(_)));
assert_eq!(