From 4cac831a8f6a8f308ef940fdcc79cc92175009a5 Mon Sep 17 00:00:00 2001 From: Joseph LaFreniere Date: Sat, 24 Dec 2022 21:56:57 -0600 Subject: [PATCH] Restore `line_prefix` parameter to `postproc_pagebreaks` --- src/adapters/postproc.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/adapters/postproc.rs b/src/adapters/postproc.rs index d6db7d9..2fc677d 100644 --- a/src/adapters/postproc.rs +++ b/src/adapters/postproc.rs @@ -146,11 +146,15 @@ pub fn postproc_prefix(line_prefix: &str, inp: impl AsyncRead + Send) -> impl As /// Adds the prefix "Page N:" to each line, /// where N starts at one and is incremented for each ASCII Form Feed character in the input stream. /// ASCII form feeds are the page delimiters output by `pdftotext`. -pub fn postproc_pagebreaks(input: impl AsyncRead + Send) -> impl AsyncRead + Send { +pub fn postproc_pagebreaks( + line_prefix: &str, + input: impl AsyncRead + Send, +) -> impl AsyncRead + Send { + let line_prefix_o: String = line_prefix.into(); let regex_linefeed = regex::bytes::Regex::new(r"\x0c").unwrap(); let regex_newline = regex::bytes::Regex::new("\n").unwrap(); let mut page_count: i32 = 1; - let mut page_prefix: String = format!("Page {}:", page_count); + let mut page_prefix: String = format!("Page {}:{}", page_count, line_prefix_o); let input_stream = ReaderStream::new(input); let output_stream = stream! { @@ -162,10 +166,10 @@ pub fn postproc_pagebreaks(input: impl AsyncRead + Send) -> impl AsyncRead + Sen for sub_chunk in sub_chunks { // println!("{}", String::from_utf8_lossy(page_prefix.as_bytes())); yield Ok(Bytes::copy_from_slice(page_prefix.as_bytes())); - page_prefix = format!("\nPage {}:", page_count); + page_prefix = format!("\nPage {}:{}", page_count, line_prefix_o); yield Ok(Bytes::copy_from_slice(®ex_newline.replace_all(&sub_chunk, page_prefix.as_bytes()))); page_count += 1; - page_prefix = format!("\nPage {}:", page_count); + page_prefix = format!("\nPage {}:{}", page_count, line_prefix_o); } } } @@ -187,7 +191,7 @@ mod tests { let mock: Mock = Builder::new() .read(b"Hello\nWorld\x0cFoo Bar\n\x0cTest") .build(); - let res = postproc_pagebreaks(mock).read_to_end(&mut output).await; + let res = postproc_pagebreaks("", mock).read_to_end(&mut output).await; println!("{}", String::from_utf8_lossy(&output)); assert!(matches!(res, Ok(_))); assert_eq!(