From d0d74adfe9d598843628eb99a3323c5552494c22 Mon Sep 17 00:00:00 2001 From: phiresky Date: Thu, 11 Jun 2020 14:38:50 +0200 Subject: [PATCH] try read -> read fn --- Cargo.lock | 1 + Cargo.toml | 1 + src/adapters.rs | 1 + src/adapters/fns.rs | 145 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 148 insertions(+) create mode 100644 src/adapters/fns.rs diff --git a/Cargo.lock b/Cargo.lock index eaa74b1..7e540ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -959,6 +959,7 @@ dependencies = [ "glob", "lazy_static", "log", + "memchr", "paste", "path-clean", "pretty-bytes", diff --git a/Cargo.toml b/Cargo.toml index 65cb783..373af1d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,3 +48,4 @@ schemars = {version = "0.8.0-alpha-2", features = ["preserve_order"]} directories-next = "1.0.1" derive_more = "0.99.7" pretty-bytes = "0.2.2" +memchr = "2.3.3" diff --git a/src/adapters.rs b/src/adapters.rs index ebeb2e4..39be834 100644 --- a/src/adapters.rs +++ b/src/adapters.rs @@ -1,6 +1,7 @@ pub mod custom; pub mod decompress; pub mod ffmpeg; +pub mod fns; pub mod pdfpages; pub mod poppler; pub mod spawning; diff --git a/src/adapters/fns.rs b/src/adapters/fns.rs new file mode 100644 index 0000000..1ef2ba3 --- /dev/null +++ b/src/adapters/fns.rs @@ -0,0 +1,145 @@ +//trait RunFnAdapter: GetMetadata {} + +//impl FileAdapter for T where T: RunFnAdapter {} + +use anyhow::Result; +use std::io::{BufRead, BufReader}; +use std::{ + cmp::min, + io::{Read, Write}, +}; + +fn postproc(line_prefix: &str, inp: &mut dyn Read, oup: &mut dyn Write) -> Result<()> { + // prepend Page X to each line + let mut page = 1; + for line in BufReader::new(inp).lines() { + let mut line = line?; + if line.contains('\x0c') { + // page break + line = line.replace('\x0c', ""); + page += 1; + if line.is_empty() { + continue; + } + } + oup.write_all(format!("{}Page {}: {}\n", line_prefix, page, line).as_bytes())?; + } + Ok(()) +} + +struct ByteReplacer +where + R: Read, +{ + inner: R, + next_read: Vec, + replacer: Box Vec>, +} + +impl ByteReplacer +where + R: Read, +{ + fn output_next(&mut self, buf: &mut [u8], buf_valid_until: usize, replacement: &[u8]) -> usize { + let after_part1 = Vec::from(&buf[1..buf_valid_until]); + + /*let mut after_part = Vec::with_capacity(replacement.len() + replaced_len); + after_part.extend_from_slice(replacement); + after_part.extend_from_slice(&buf[..replaced_len]);*/ + + let writeable_count = min(buf.len(), replacement.len()); + buf[..writeable_count].copy_from_slice(&replacement[0..writeable_count]); + + let after_rep = &replacement[writeable_count..]; + let mut ov = Vec::new(); + ov.extend_from_slice(&after_rep); + ov.extend_from_slice(&after_part1); + ov.extend_from_slice(&self.next_read); + self.next_read = ov; + + return writeable_count; + } +} + +impl Read for ByteReplacer +where + R: Read, +{ + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let read = if self.next_read.len() > 0 { + let count = std::cmp::min(self.next_read.len(), buf.len()); + buf[0..count].copy_from_slice(&self.next_read[0..count]); + self.next_read.drain(0..count).count(); + Ok(count) + } else { + self.inner.read(buf) + }; + + match read { + Ok(u) => { + match memchr::memchr2(b'\n', b'\x0c', &buf[0..u]) { + Some(i) => { + let data = (self.replacer)(buf[i]); + + Ok(i + self.output_next(&mut buf[i..], u - i, &data)) + } + None => Ok(u), + } + // todo: use memchr2? + } + Err(e) => Err(e), + } + } +} + +pub fn postprocB(line_prefix: &str, inp: impl Read) -> Result { + let mut page_count = 1; + + Ok(ByteReplacer { + inner: inp, + next_read: Vec::new(), + replacer: Box::new(move |b| match b { + b'\n' => format!("\nPage {}:", page_count).into_bytes(), + b'\x0c' => { + page_count += 1; + format!("\nPage {}:", page_count).into_bytes() + } + _ => b"[[imposs]]".to_vec(), + }), + }) +} + +#[cfg(test)] +mod tests { + use super::postprocB; + use anyhow::Result; + use std::io::Read; + + fn test_from_strs(a: &str, b: &str) -> Result<()> { + let mut oup = Vec::new(); + postprocB("", a.as_bytes())?.read_to_end(&mut oup)?; + let c = String::from_utf8_lossy(&oup); + if b != c { + anyhow::bail!("{}\nshould be\n{}\nbut is\n{}", a, b, c); + } + + Ok(()) + } + + #[test] + fn post1() -> Result<()> { + let inp = "What is this\nThis is a test\nFoo"; + let oup = "What is this\nPage 1:This is a test\nPage 1:Foo"; + + test_from_strs(inp, oup)?; + + println!("\n\n\n\n"); + + let inp = "What is this\nThis is a test\nFoo\x0c\nHelloooo\nHow are you?\x0c\nGreat!"; + let oup = "What is this\nPage 1:This is a test\nPage 1:Foo\nPage 2:\nPage 2:Helloooo\nPage 2:How are you?\nPage 3:\nPage 3:Great!"; + + test_from_strs(inp, oup)?; + + Ok(()) + } +}