mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-08 22:10:37 +00:00
simplify pagebreaks and add one space
This commit is contained in:
parent
2d28651bcb
commit
96ebcdef27
@ -170,7 +170,7 @@ impl FileAdapter for PostprocPageBreaks {
|
|||||||
a: super::AdaptInfo,
|
a: super::AdaptInfo,
|
||||||
_detection_reason: &crate::matching::FileMatcher,
|
_detection_reason: &crate::matching::FileMatcher,
|
||||||
) -> Result<AdaptedFilesIterBox> {
|
) -> Result<AdaptedFilesIterBox> {
|
||||||
let read = postproc_pagebreaks("", postproc_encoding(&a.line_prefix, a.inp)?);
|
let read = postproc_pagebreaks(postproc_encoding(&a.line_prefix, a.inp)?);
|
||||||
// keep adapt info (filename etc) except replace inp
|
// keep adapt info (filename etc) except replace inp
|
||||||
let ai = AdaptInfo {
|
let ai = AdaptInfo {
|
||||||
inp: Box::pin(read),
|
inp: Box::pin(read),
|
||||||
@ -190,30 +190,27 @@ impl FileAdapter for PostprocPageBreaks {
|
|||||||
/// Adds the prefix "Page N: " to each line,
|
/// Adds the prefix "Page N: " to each line,
|
||||||
/// where N starts at one and is incremented for each ASCII Form Feed character in the input stream.
|
/// where N starts at one and is incremented for each ASCII Form Feed character in the input stream.
|
||||||
/// ASCII form feeds are the page delimiters output by `pdftotext`.
|
/// ASCII form feeds are the page delimiters output by `pdftotext`.
|
||||||
pub fn postproc_pagebreaks(
|
pub fn postproc_pagebreaks(input: impl AsyncRead + Send) -> impl AsyncRead + Send {
|
||||||
line_prefix: &str,
|
|
||||||
input: impl AsyncRead + Send,
|
|
||||||
) -> impl AsyncRead + Send {
|
|
||||||
let line_prefix_o: String = line_prefix.into();
|
|
||||||
let regex_linefeed = regex::bytes::Regex::new(r"\x0c").unwrap();
|
let regex_linefeed = regex::bytes::Regex::new(r"\x0c").unwrap();
|
||||||
let regex_newline = regex::bytes::Regex::new("\n").unwrap();
|
let regex_newline = regex::bytes::Regex::new("\n").unwrap();
|
||||||
let mut page_count: i32 = 1;
|
let mut page_count: i32 = 1;
|
||||||
let mut page_prefix: String = format!("Page {page_count}:{line_prefix_o}");
|
let mut page_prefix: String = format!("Page {page_count}: ");
|
||||||
|
|
||||||
let input_stream = ReaderStream::new(input);
|
let input_stream = ReaderStream::new(input);
|
||||||
let output_stream = stream! {
|
let output_stream = stream! {
|
||||||
for await chunk in input_stream {
|
for await read_chunk in input_stream {
|
||||||
match chunk {
|
match read_chunk {
|
||||||
Err(e) => yield Err(e),
|
Err(e) => yield Err(e),
|
||||||
Ok(chunk) => {
|
Ok(chunk) => {
|
||||||
let sub_chunks = regex_linefeed.split(&chunk);
|
let page_chunks = regex_linefeed.split(&chunk);
|
||||||
for sub_chunk in sub_chunks {
|
for page_chunk in page_chunks {
|
||||||
// println!("{}", String::from_utf8_lossy(page_prefix.as_bytes()));
|
// println!("{}", String::from_utf8_lossy(page_prefix.as_bytes()));
|
||||||
yield Ok(Bytes::copy_from_slice(page_prefix.as_bytes()));
|
yield Ok(Bytes::copy_from_slice(page_prefix.as_bytes()));
|
||||||
page_prefix = format!("\nPage {}:{}", page_count, line_prefix_o);
|
page_prefix = format!("\nPage {page_count}: ");
|
||||||
yield Ok(Bytes::copy_from_slice(®ex_newline.replace_all(sub_chunk, page_prefix.as_bytes())));
|
|
||||||
|
yield Ok(Bytes::copy_from_slice(®ex_newline.replace_all(page_chunk, page_prefix.as_bytes())));
|
||||||
page_count += 1;
|
page_count += 1;
|
||||||
page_prefix = format!("\nPage {}:{}", page_count, line_prefix_o);
|
page_prefix = format!("\nPage {page_count}: ");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -226,6 +223,7 @@ pub fn postproc_pagebreaks(
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use pretty_assertions::assert_eq;
|
||||||
use tokio::pin;
|
use tokio::pin;
|
||||||
use tokio_test::io::Builder;
|
use tokio_test::io::Builder;
|
||||||
use tokio_test::io::Mock;
|
use tokio_test::io::Mock;
|
||||||
@ -236,12 +234,12 @@ mod tests {
|
|||||||
let mock: Mock = Builder::new()
|
let mock: Mock = Builder::new()
|
||||||
.read(b"Hello\nWorld\x0cFoo Bar\n\x0cTest")
|
.read(b"Hello\nWorld\x0cFoo Bar\n\x0cTest")
|
||||||
.build();
|
.build();
|
||||||
let res = postproc_pagebreaks("", mock).read_to_end(&mut output).await;
|
let res = postproc_pagebreaks(mock).read_to_end(&mut output).await;
|
||||||
println!("{}", String::from_utf8_lossy(&output));
|
println!("{}", String::from_utf8_lossy(&output));
|
||||||
assert!(matches!(res, Ok(_)));
|
assert!(matches!(res, Ok(_)));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
output,
|
String::from_utf8_lossy(&output),
|
||||||
b"Page 1:Hello\nPage 1:World\nPage 2:Foo Bar\nPage 2:\nPage 3:Test"
|
"Page 1: Hello\nPage 1: World\nPage 2: Foo Bar\nPage 2: \nPage 3: Test"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -275,23 +273,14 @@ mod tests {
|
|||||||
let mut oup = Vec::new();
|
let mut oup = Vec::new();
|
||||||
let inp = postproc_encoding("", a)?;
|
let inp = postproc_encoding("", a)?;
|
||||||
if pagebreaks {
|
if pagebreaks {
|
||||||
postproc_pagebreaks(line_prefix, inp)
|
postproc_pagebreaks(inp).read_to_end(&mut oup).await?;
|
||||||
.read_to_end(&mut oup)
|
|
||||||
.await?;
|
|
||||||
} else {
|
} else {
|
||||||
let x = postproc_prefix(line_prefix, inp);
|
let x = postproc_prefix(line_prefix, inp);
|
||||||
pin!(x);
|
pin!(x);
|
||||||
x.read_to_end(&mut oup).await?;
|
x.read_to_end(&mut oup).await?;
|
||||||
}
|
}
|
||||||
let c = String::from_utf8_lossy(&oup);
|
let c = String::from_utf8_lossy(&oup);
|
||||||
if b != c {
|
assert_eq!(c, b, "source: {}", String::from_utf8_lossy(a));
|
||||||
anyhow::bail!(
|
|
||||||
"`{}`\nshould be\n`{}`\nbut is\n`{}`",
|
|
||||||
String::from_utf8_lossy(a),
|
|
||||||
b,
|
|
||||||
c
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user