diff --git a/src/read_iter.rs b/src/adapted_iter.rs similarity index 55% rename from src/read_iter.rs rename to src/adapted_iter.rs index 35e4e08..3f9befc 100644 --- a/src/read_iter.rs +++ b/src/adapted_iter.rs @@ -1,24 +1,24 @@ use crate::adapters::AdaptInfo; // TODO: using iterator trait possible?? should basically be Iterator -pub trait ReadIter { +pub trait AdaptedFilesIter { // next takes a 'a-lived reference and returns an AdaptInfo that lives as long as the reference fn next<'a>(&'a mut self) -> Option>; } /// A single AdaptInfo -pub struct SingleReadIter<'a> { +pub struct SingleAdaptedFileAsIter<'a> { ai: Option>, } -impl SingleReadIter<'_> { - pub fn new<'a>(ai: AdaptInfo<'a>) -> SingleReadIter<'a> { - SingleReadIter { ai: Some(ai) } +impl SingleAdaptedFileAsIter<'_> { + pub fn new<'a>(ai: AdaptInfo<'a>) -> SingleAdaptedFileAsIter<'a> { + SingleAdaptedFileAsIter { ai: Some(ai) } } } -impl ReadIter for SingleReadIter<'_> { +impl AdaptedFilesIter for SingleAdaptedFileAsIter<'_> { fn next<'a>(&'a mut self) -> Option> { self.ai.take() } } -pub type ReadIterBox<'a> = Box; +pub type AdaptedFilesIterBox<'a> = Box; diff --git a/src/adapters.rs b/src/adapters.rs index 3840c51..cb744bd 100644 --- a/src/adapters.rs +++ b/src/adapters.rs @@ -9,7 +9,7 @@ pub mod spawning; // pub mod tesseract; // pub mod writing; pub mod zip; -use crate::{config::RgaConfig, matching::*, read_iter::ReadIterBox}; +use crate::{adapted_iter::AdaptedFilesIterBox, config::RgaConfig, matching::*}; use anyhow::*; // use custom::builtin_spawning_adapters; //use custom::CustomAdapterConfig; @@ -84,7 +84,7 @@ pub trait FileAdapter: GetMetadata { &self, a: AdaptInfo<'a>, detection_reason: &FileMatcher, - ) -> Result>; + ) -> Result>; } pub struct AdaptInfo<'a> { diff --git a/src/adapters/custom.rs b/src/adapters/custom.rs index 3ecd6be..b320a57 100644 --- a/src/adapters/custom.rs +++ b/src/adapters/custom.rs @@ -232,16 +232,16 @@ mod test { let (a, d) = simple_adapt_info(&filepath, Box::new(File::open(&filepath)?)); let mut r = adapter.adapt(a, &d)?; - let mut o = Vec::new(); - r.read_to_end(&mut o)?; + let o = adapted_to_vec(r)?; assert_eq!( String::from_utf8(o)?, - "hello world -this is just a test. - -1 - -\u{c}" + "PREFIX:hello world +PREFIX:this is just a test. +PREFIX: +PREFIX:1 +PREFIX: +PREFIX:\u{c} +" ); Ok(()) } diff --git a/src/adapters/postproc.rs b/src/adapters/postproc.rs index fa2d0ac..cbca91d 100644 --- a/src/adapters/postproc.rs +++ b/src/adapters/postproc.rs @@ -6,10 +6,39 @@ use anyhow::Result; use std::{cmp::min, io::Read}; -use crate::read_iter::{ReadIterBox, SingleReadIter}; +use crate::adapted_iter::{AdaptedFilesIterBox, SingleAdaptedFileAsIter}; use super::{AdaptInfo, AdapterMeta, FileAdapter, GetMetadata}; +pub struct EnsureEndsWithNewline { + inner: R, + added_newline: bool, +} +impl EnsureEndsWithNewline { + pub fn new(r: R) -> EnsureEndsWithNewline { + EnsureEndsWithNewline { + inner: r, + added_newline: false, + } + } +} +impl Read for EnsureEndsWithNewline { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + match self.inner.read(buf) { + Ok(0) => { + if self.added_newline { + Ok(0) + } else { + buf[0] = b'\n'; + self.added_newline = true; + Ok(1) + } + } + Ok(n) => Ok(n), + Err(e) => Err(e), + } + } +} struct ByteReplacer where R: Read, @@ -99,15 +128,15 @@ impl FileAdapter for PostprocPrefix { &self, a: super::AdaptInfo<'a>, _detection_reason: &crate::matching::FileMatcher, - ) -> Result> { - let read = postproc_prefix(&a.line_prefix, a.inp)?; + ) -> Result> { + let read = EnsureEndsWithNewline::new(postproc_prefix(&a.line_prefix, a.inp)?); // keep adapt info (filename etc) except replace inp let ai = AdaptInfo { inp: Box::new(read), postprocess: false, ..a }; - Ok(Box::new(SingleReadIter::new(ai))) + Ok(Box::new(SingleAdaptedFileAsIter::new(ai))) } } diff --git a/src/adapters/spawning.rs b/src/adapters/spawning.rs index cf785ff..1de2e52 100644 --- a/src/adapters/spawning.rs +++ b/src/adapters/spawning.rs @@ -1,4 +1,4 @@ -use crate::read_iter::SingleReadIter; +use crate::adapted_iter::SingleAdaptedFileAsIter; use super::*; use anyhow::*; @@ -146,7 +146,7 @@ impl FileAdapter for SpawningFileAdapter { &self, ai: AdaptInfo<'a>, _detection_reason: &FileMatcher, - ) -> Result> { + ) -> Result> { let AdaptInfo { filepath_hint, mut inp, @@ -164,7 +164,7 @@ impl FileAdapter for SpawningFileAdapter { .with_context(|| format!("Could not set cmd arguments for {}", self.inner.get_exe()))?; debug!("executing {:?}", cmd); let output = pipe_output(&line_prefix, cmd, &mut inp, self.inner.get_exe(), "")?; - Ok(Box::new(SingleReadIter::new(AdaptInfo { + Ok(Box::new(SingleAdaptedFileAsIter::new(AdaptInfo { filepath_hint, inp: output, line_prefix, diff --git a/src/adapters/zip.rs b/src/adapters/zip.rs index 2c1398b..7a2d614 100644 --- a/src/adapters/zip.rs +++ b/src/adapters/zip.rs @@ -1,5 +1,5 @@ use super::*; -use crate::{print_bytes, read_iter::ReadIter}; +use crate::{adapted_iter::AdaptedFilesIter, print_bytes}; use anyhow::*; use lazy_static::lazy_static; use log::*; @@ -40,7 +40,7 @@ impl FileAdapter for ZipAdapter { &self, inp: AdaptInfo<'a>, _detection_reason: &FileMatcher, - ) -> Result> { + ) -> Result> { Ok(Box::new(ZipAdaptIter { inp })) } } @@ -48,7 +48,7 @@ impl FileAdapter for ZipAdapter { struct ZipAdaptIter<'a> { inp: AdaptInfo<'a>, } -impl<'a> ReadIter for ZipAdaptIter<'a> { +impl<'a> AdaptedFilesIter for ZipAdaptIter<'a> { fn next<'b>(&'b mut self) -> Option> { let line_prefix = &self.inp.line_prefix; let filepath_hint = &self.inp.filepath_hint; @@ -115,14 +115,11 @@ mod test { &PathBuf::from("outer.zip"), Box::new(std::io::Cursor::new(zipfile)), ); - let mut res = RecursingConcattyReader::concat(adapter.adapt(a, &d)?); - - let mut buf = Vec::new(); - res.read_to_end(&mut buf)?; + let buf = adapted_to_vec(adapter.adapt(a, &d)?)?; assert_eq!( String::from_utf8(buf)?, - "PREFIX:outer.txt:outer text file\n", + "PREFIX:outer.txt: outer text file\nPREFIX:inner.zip: inner.txt: inner text file\n", ); Ok(()) diff --git a/src/lib.rs b/src/lib.rs index 6956804..b7ea539 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ #![warn(clippy::all)] #![feature(negative_impls)] -#![feature(specialization)] +pub mod adapted_iter; pub mod adapters; mod caching_writer; pub mod config; @@ -9,7 +9,6 @@ pub mod matching; pub mod pipe; pub mod preproc; pub mod preproc_cache; -pub mod read_iter; pub mod recurse; #[cfg(test)] pub mod test_utils; diff --git a/src/recurse.rs b/src/recurse.rs index 1c94fba..2c3418d 100644 --- a/src/recurse.rs +++ b/src/recurse.rs @@ -1,15 +1,15 @@ use crate::preproc::rga_preproc; -use crate::{adapters::*, read_iter::ReadIterBox}; +use crate::{adapted_iter::AdaptedFilesIterBox, adapters::*}; use anyhow::*; use std::io::Read; pub struct RecursingConcattyReader<'a> { - inp: ReadIterBox<'a>, + inp: AdaptedFilesIterBox<'a>, cur: Option>, } impl<'a> RecursingConcattyReader<'a> { - pub fn concat(inp: ReadIterBox<'a>) -> Result> { + pub fn concat(inp: AdaptedFilesIterBox<'a>) -> Result> { let mut r = RecursingConcattyReader { inp, cur: None }; r.ascend()?; Ok(Box::new(r)) @@ -20,7 +20,7 @@ impl<'a> RecursingConcattyReader<'a> { // we only need to access the inp: ReadIter when the inner reader is done, so this should be safe let ai = unsafe { // would love to make this safe, but how? something like OwnedRef - (*(inp as *mut ReadIterBox<'a>)).next() + (*(inp as *mut AdaptedFilesIterBox<'a>)).next() }; self.cur = match ai { Some(ai) => Some(rga_preproc(ai)?), diff --git a/src/test_utils.rs b/src/test_utils.rs index 5bbaebc..3ab4cc2 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -1,8 +1,11 @@ use crate::{ + adapted_iter::AdaptedFilesIterBox, adapters::{AdaptInfo, ReadBox}, config::RgaConfig, matching::{FastFileMatcher, FileMatcher}, + recurse::RecursingConcattyReader, }; +use anyhow::Result; use std::path::{Path, PathBuf}; pub fn test_data_dir() -> PathBuf { @@ -28,3 +31,11 @@ pub fn simple_adapt_info<'a>(filepath: &Path, inp: ReadBox<'a>) -> (AdaptInfo<'a .into(), ) } + +pub fn adapted_to_vec(adapted: AdaptedFilesIterBox<'_>) -> Result> { + let mut res = RecursingConcattyReader::concat(adapted)?; + + let mut buf = Vec::new(); + res.read_to_end(&mut buf)?; + Ok(buf) +}