fix prefixing

This commit is contained in:
phiresky 2020-09-30 17:26:42 +02:00
parent dfc10cbb53
commit baca166fda
9 changed files with 74 additions and 38 deletions

View File

@ -1,24 +1,24 @@
use crate::adapters::AdaptInfo; use crate::adapters::AdaptInfo;
// TODO: using iterator trait possible?? should basically be Iterator<AdaptInfo> // TODO: using iterator trait possible?? should basically be Iterator<AdaptInfo>
pub trait ReadIter { pub trait AdaptedFilesIter {
// next takes a 'a-lived reference and returns an AdaptInfo that lives as long as the reference // next takes a 'a-lived reference and returns an AdaptInfo that lives as long as the reference
fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>>; fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>>;
} }
/// A single AdaptInfo /// A single AdaptInfo
pub struct SingleReadIter<'a> { pub struct SingleAdaptedFileAsIter<'a> {
ai: Option<AdaptInfo<'a>>, ai: Option<AdaptInfo<'a>>,
} }
impl SingleReadIter<'_> { impl SingleAdaptedFileAsIter<'_> {
pub fn new<'a>(ai: AdaptInfo<'a>) -> SingleReadIter<'a> { pub fn new<'a>(ai: AdaptInfo<'a>) -> SingleAdaptedFileAsIter<'a> {
SingleReadIter { ai: Some(ai) } SingleAdaptedFileAsIter { ai: Some(ai) }
} }
} }
impl ReadIter for SingleReadIter<'_> { impl AdaptedFilesIter for SingleAdaptedFileAsIter<'_> {
fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>> { fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>> {
self.ai.take() self.ai.take()
} }
} }
pub type ReadIterBox<'a> = Box<dyn ReadIter + 'a>; pub type AdaptedFilesIterBox<'a> = Box<dyn AdaptedFilesIter + 'a>;

View File

@ -9,7 +9,7 @@ pub mod spawning;
// pub mod tesseract; // pub mod tesseract;
// pub mod writing; // pub mod writing;
pub mod zip; pub mod zip;
use crate::{config::RgaConfig, matching::*, read_iter::ReadIterBox}; use crate::{adapted_iter::AdaptedFilesIterBox, config::RgaConfig, matching::*};
use anyhow::*; use anyhow::*;
// use custom::builtin_spawning_adapters; // use custom::builtin_spawning_adapters;
//use custom::CustomAdapterConfig; //use custom::CustomAdapterConfig;
@ -84,7 +84,7 @@ pub trait FileAdapter: GetMetadata {
&self, &self,
a: AdaptInfo<'a>, a: AdaptInfo<'a>,
detection_reason: &FileMatcher, detection_reason: &FileMatcher,
) -> Result<ReadIterBox<'a>>; ) -> Result<AdaptedFilesIterBox<'a>>;
} }
pub struct AdaptInfo<'a> { pub struct AdaptInfo<'a> {

View File

@ -232,16 +232,16 @@ mod test {
let (a, d) = simple_adapt_info(&filepath, Box::new(File::open(&filepath)?)); let (a, d) = simple_adapt_info(&filepath, Box::new(File::open(&filepath)?));
let mut r = adapter.adapt(a, &d)?; let mut r = adapter.adapt(a, &d)?;
let mut o = Vec::new(); let o = adapted_to_vec(r)?;
r.read_to_end(&mut o)?;
assert_eq!( assert_eq!(
String::from_utf8(o)?, String::from_utf8(o)?,
"hello world "PREFIX:hello world
this is just a test. PREFIX:this is just a test.
PREFIX:
1 PREFIX:1
PREFIX:
\u{c}" PREFIX:\u{c}
"
); );
Ok(()) Ok(())
} }

View File

@ -6,10 +6,39 @@ use anyhow::Result;
use std::{cmp::min, io::Read}; use std::{cmp::min, io::Read};
use crate::read_iter::{ReadIterBox, SingleReadIter}; use crate::adapted_iter::{AdaptedFilesIterBox, SingleAdaptedFileAsIter};
use super::{AdaptInfo, AdapterMeta, FileAdapter, GetMetadata}; use super::{AdaptInfo, AdapterMeta, FileAdapter, GetMetadata};
pub struct EnsureEndsWithNewline<R: Read> {
inner: R,
added_newline: bool,
}
impl<R: Read> EnsureEndsWithNewline<R> {
pub fn new(r: R) -> EnsureEndsWithNewline<R> {
EnsureEndsWithNewline {
inner: r,
added_newline: false,
}
}
}
impl<R: Read> Read for EnsureEndsWithNewline<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
match self.inner.read(buf) {
Ok(0) => {
if self.added_newline {
Ok(0)
} else {
buf[0] = b'\n';
self.added_newline = true;
Ok(1)
}
}
Ok(n) => Ok(n),
Err(e) => Err(e),
}
}
}
struct ByteReplacer<R> struct ByteReplacer<R>
where where
R: Read, R: Read,
@ -99,15 +128,15 @@ impl FileAdapter for PostprocPrefix {
&self, &self,
a: super::AdaptInfo<'a>, a: super::AdaptInfo<'a>,
_detection_reason: &crate::matching::FileMatcher, _detection_reason: &crate::matching::FileMatcher,
) -> Result<ReadIterBox<'a>> { ) -> Result<AdaptedFilesIterBox<'a>> {
let read = postproc_prefix(&a.line_prefix, a.inp)?; let read = EnsureEndsWithNewline::new(postproc_prefix(&a.line_prefix, a.inp)?);
// keep adapt info (filename etc) except replace inp // keep adapt info (filename etc) except replace inp
let ai = AdaptInfo { let ai = AdaptInfo {
inp: Box::new(read), inp: Box::new(read),
postprocess: false, postprocess: false,
..a ..a
}; };
Ok(Box::new(SingleReadIter::new(ai))) Ok(Box::new(SingleAdaptedFileAsIter::new(ai)))
} }
} }

View File

@ -1,4 +1,4 @@
use crate::read_iter::SingleReadIter; use crate::adapted_iter::SingleAdaptedFileAsIter;
use super::*; use super::*;
use anyhow::*; use anyhow::*;
@ -146,7 +146,7 @@ impl FileAdapter for SpawningFileAdapter {
&self, &self,
ai: AdaptInfo<'a>, ai: AdaptInfo<'a>,
_detection_reason: &FileMatcher, _detection_reason: &FileMatcher,
) -> Result<ReadIterBox<'a>> { ) -> Result<AdaptedFilesIterBox<'a>> {
let AdaptInfo { let AdaptInfo {
filepath_hint, filepath_hint,
mut inp, mut inp,
@ -164,7 +164,7 @@ impl FileAdapter for SpawningFileAdapter {
.with_context(|| format!("Could not set cmd arguments for {}", self.inner.get_exe()))?; .with_context(|| format!("Could not set cmd arguments for {}", self.inner.get_exe()))?;
debug!("executing {:?}", cmd); debug!("executing {:?}", cmd);
let output = pipe_output(&line_prefix, cmd, &mut inp, self.inner.get_exe(), "")?; let output = pipe_output(&line_prefix, cmd, &mut inp, self.inner.get_exe(), "")?;
Ok(Box::new(SingleReadIter::new(AdaptInfo { Ok(Box::new(SingleAdaptedFileAsIter::new(AdaptInfo {
filepath_hint, filepath_hint,
inp: output, inp: output,
line_prefix, line_prefix,

View File

@ -1,5 +1,5 @@
use super::*; use super::*;
use crate::{print_bytes, read_iter::ReadIter}; use crate::{adapted_iter::AdaptedFilesIter, print_bytes};
use anyhow::*; use anyhow::*;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use log::*; use log::*;
@ -40,7 +40,7 @@ impl FileAdapter for ZipAdapter {
&self, &self,
inp: AdaptInfo<'a>, inp: AdaptInfo<'a>,
_detection_reason: &FileMatcher, _detection_reason: &FileMatcher,
) -> Result<Box<dyn ReadIter + 'a>> { ) -> Result<Box<dyn AdaptedFilesIter + 'a>> {
Ok(Box::new(ZipAdaptIter { inp })) Ok(Box::new(ZipAdaptIter { inp }))
} }
} }
@ -48,7 +48,7 @@ impl FileAdapter for ZipAdapter {
struct ZipAdaptIter<'a> { struct ZipAdaptIter<'a> {
inp: AdaptInfo<'a>, inp: AdaptInfo<'a>,
} }
impl<'a> ReadIter for ZipAdaptIter<'a> { impl<'a> AdaptedFilesIter for ZipAdaptIter<'a> {
fn next<'b>(&'b mut self) -> Option<AdaptInfo<'b>> { fn next<'b>(&'b mut self) -> Option<AdaptInfo<'b>> {
let line_prefix = &self.inp.line_prefix; let line_prefix = &self.inp.line_prefix;
let filepath_hint = &self.inp.filepath_hint; let filepath_hint = &self.inp.filepath_hint;
@ -115,14 +115,11 @@ mod test {
&PathBuf::from("outer.zip"), &PathBuf::from("outer.zip"),
Box::new(std::io::Cursor::new(zipfile)), Box::new(std::io::Cursor::new(zipfile)),
); );
let mut res = RecursingConcattyReader::concat(adapter.adapt(a, &d)?); let buf = adapted_to_vec(adapter.adapt(a, &d)?)?;
let mut buf = Vec::new();
res.read_to_end(&mut buf)?;
assert_eq!( assert_eq!(
String::from_utf8(buf)?, String::from_utf8(buf)?,
"PREFIX:outer.txt:outer text file\n", "PREFIX:outer.txt: outer text file\nPREFIX:inner.zip: inner.txt: inner text file\n",
); );
Ok(()) Ok(())

View File

@ -1,7 +1,7 @@
#![warn(clippy::all)] #![warn(clippy::all)]
#![feature(negative_impls)] #![feature(negative_impls)]
#![feature(specialization)]
pub mod adapted_iter;
pub mod adapters; pub mod adapters;
mod caching_writer; mod caching_writer;
pub mod config; pub mod config;
@ -9,7 +9,6 @@ pub mod matching;
pub mod pipe; pub mod pipe;
pub mod preproc; pub mod preproc;
pub mod preproc_cache; pub mod preproc_cache;
pub mod read_iter;
pub mod recurse; pub mod recurse;
#[cfg(test)] #[cfg(test)]
pub mod test_utils; pub mod test_utils;

View File

@ -1,15 +1,15 @@
use crate::preproc::rga_preproc; use crate::preproc::rga_preproc;
use crate::{adapters::*, read_iter::ReadIterBox}; use crate::{adapted_iter::AdaptedFilesIterBox, adapters::*};
use anyhow::*; use anyhow::*;
use std::io::Read; use std::io::Read;
pub struct RecursingConcattyReader<'a> { pub struct RecursingConcattyReader<'a> {
inp: ReadIterBox<'a>, inp: AdaptedFilesIterBox<'a>,
cur: Option<ReadBox<'a>>, cur: Option<ReadBox<'a>>,
} }
impl<'a> RecursingConcattyReader<'a> { impl<'a> RecursingConcattyReader<'a> {
pub fn concat(inp: ReadIterBox<'a>) -> Result<Box<dyn Read + 'a>> { pub fn concat(inp: AdaptedFilesIterBox<'a>) -> Result<Box<dyn Read + 'a>> {
let mut r = RecursingConcattyReader { inp, cur: None }; let mut r = RecursingConcattyReader { inp, cur: None };
r.ascend()?; r.ascend()?;
Ok(Box::new(r)) Ok(Box::new(r))
@ -20,7 +20,7 @@ impl<'a> RecursingConcattyReader<'a> {
// we only need to access the inp: ReadIter when the inner reader is done, so this should be safe // we only need to access the inp: ReadIter when the inner reader is done, so this should be safe
let ai = unsafe { let ai = unsafe {
// would love to make this safe, but how? something like OwnedRef<inp, cur> // would love to make this safe, but how? something like OwnedRef<inp, cur>
(*(inp as *mut ReadIterBox<'a>)).next() (*(inp as *mut AdaptedFilesIterBox<'a>)).next()
}; };
self.cur = match ai { self.cur = match ai {
Some(ai) => Some(rga_preproc(ai)?), Some(ai) => Some(rga_preproc(ai)?),

View File

@ -1,8 +1,11 @@
use crate::{ use crate::{
adapted_iter::AdaptedFilesIterBox,
adapters::{AdaptInfo, ReadBox}, adapters::{AdaptInfo, ReadBox},
config::RgaConfig, config::RgaConfig,
matching::{FastFileMatcher, FileMatcher}, matching::{FastFileMatcher, FileMatcher},
recurse::RecursingConcattyReader,
}; };
use anyhow::Result;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
pub fn test_data_dir() -> PathBuf { pub fn test_data_dir() -> PathBuf {
@ -28,3 +31,11 @@ pub fn simple_adapt_info<'a>(filepath: &Path, inp: ReadBox<'a>) -> (AdaptInfo<'a
.into(), .into(),
) )
} }
pub fn adapted_to_vec(adapted: AdaptedFilesIterBox<'_>) -> Result<Vec<u8>> {
let mut res = RecursingConcattyReader::concat(adapted)?;
let mut buf = Vec::new();
res.read_to_end(&mut buf)?;
Ok(buf)
}