mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-10 06:50:38 +00:00
Implement async postproc_pagebreaks
This commit is contained in:
parent
0d75d5bcc2
commit
af168efe1a
@ -119,14 +119,12 @@ pub fn postproc_encoding(
|
||||
))*/
|
||||
}
|
||||
|
||||
/**
|
||||
* adds the given prefix to each line in a AsyncRead
|
||||
*/
|
||||
/// Adds the given prefix to each line in an `AsyncRead`.
|
||||
pub fn postproc_prefix(line_prefix: &str, inp: impl AsyncRead + Send) -> impl AsyncRead + Send {
|
||||
let line_prefix_n = format!("\n{}", line_prefix); // clone since we need it later
|
||||
let line_prefix_o = Bytes::copy_from_slice(line_prefix.as_bytes());
|
||||
let regex = regex::bytes::Regex::new("\n").unwrap();
|
||||
let mut inp_stream = ReaderStream::new(inp);
|
||||
let inp_stream = ReaderStream::new(inp);
|
||||
let oup_stream = stream! {
|
||||
yield Ok(line_prefix_o);
|
||||
for await chunk in inp_stream {
|
||||
@ -145,30 +143,35 @@ pub fn postproc_prefix(line_prefix: &str, inp: impl AsyncRead + Send) -> impl As
|
||||
StreamReader::new(oup_stream)
|
||||
}
|
||||
|
||||
/**
|
||||
* adds the prefix `Page N:` to each line,
|
||||
* where N starts at one and is incremented for each ASCII Form Feed character in the input stream.
|
||||
* (That's the format output by pdftotext)
|
||||
*/
|
||||
/// Adds the prefix "Page N:" to each line,
|
||||
/// where N starts at one and is incremented for each ASCII Form Feed character in the input stream.
|
||||
/// ASCII form feeds are the page delimiters output by `pdftotext`.
|
||||
pub fn postproc_pagebreaks(line_prefix: &str, inp: impl AsyncRead) -> impl AsyncRead {
|
||||
let line_prefix = line_prefix.to_string(); // clone since
|
||||
let mut page_count = 1;
|
||||
let form_feed = b'\x0c';
|
||||
let regex = regex::bytes::Regex::new("\n").unwrap();
|
||||
let mut page_count = 0;
|
||||
let mut line_prefix = format!("\n{}Page {}:", line_prefix, page_count + 1);
|
||||
|
||||
panic!("todo!");
|
||||
tokio::io::empty()
|
||||
/*ByteReplacer {
|
||||
inner: inp,
|
||||
next_read: format!("{}Page {}:", line_prefix, page_count).into_bytes(),
|
||||
haystacker: Box::new(|buf| memchr::memchr2(b'\n', b'\x0c', buf)),
|
||||
replacer: Box::new(move |b| match b {
|
||||
b'\n' => format!("\n{}Page {}:", line_prefix, page_count).into_bytes(),
|
||||
b'\x0c' => {
|
||||
page_count += 1;
|
||||
format!("\n{}Page {}:", line_prefix, page_count).into_bytes()
|
||||
let inp_stream = ReaderStream::new(inp);
|
||||
let oup_stream = stream! {
|
||||
yield Ok(Bytes::copy_from_slice(line_prefix.as_bytes()));
|
||||
for await chunk in inp_stream {
|
||||
match chunk {
|
||||
Err(e) => yield Err(e),
|
||||
Ok(chunk) => {
|
||||
let chunk_iter = chunk.split(|byte| byte == &form_feed);
|
||||
for sub_chunk in chunk_iter {
|
||||
if sub_chunk.contains(&b'\n') {
|
||||
yield Ok(Bytes::copy_from_slice(®ex.replace_all(&sub_chunk, line_prefix.as_bytes())));
|
||||
page_count += 1;
|
||||
line_prefix = format!("\n{}Page {}:", line_prefix, page_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => b"[[imposs]]".to_vec(),
|
||||
}),
|
||||
}*/
|
||||
}
|
||||
};
|
||||
StreamReader::new(oup_stream)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
Loading…
Reference in New Issue
Block a user