fix prefixing

This commit is contained in:
phiresky 2020-09-30 17:26:42 +02:00
parent dfc10cbb53
commit baca166fda
9 changed files with 74 additions and 38 deletions

View File

@ -1,24 +1,24 @@
use crate::adapters::AdaptInfo;
// TODO: using iterator trait possible?? should basically be Iterator<AdaptInfo>
pub trait ReadIter {
pub trait AdaptedFilesIter {
// next takes a 'a-lived reference and returns an AdaptInfo that lives as long as the reference
fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>>;
}
/// A single AdaptInfo
pub struct SingleReadIter<'a> {
pub struct SingleAdaptedFileAsIter<'a> {
ai: Option<AdaptInfo<'a>>,
}
impl SingleReadIter<'_> {
pub fn new<'a>(ai: AdaptInfo<'a>) -> SingleReadIter<'a> {
SingleReadIter { ai: Some(ai) }
impl SingleAdaptedFileAsIter<'_> {
pub fn new<'a>(ai: AdaptInfo<'a>) -> SingleAdaptedFileAsIter<'a> {
SingleAdaptedFileAsIter { ai: Some(ai) }
}
}
impl ReadIter for SingleReadIter<'_> {
impl AdaptedFilesIter for SingleAdaptedFileAsIter<'_> {
fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>> {
self.ai.take()
}
}
pub type ReadIterBox<'a> = Box<dyn ReadIter + 'a>;
pub type AdaptedFilesIterBox<'a> = Box<dyn AdaptedFilesIter + 'a>;

View File

@ -9,7 +9,7 @@ pub mod spawning;
// pub mod tesseract;
// pub mod writing;
pub mod zip;
use crate::{config::RgaConfig, matching::*, read_iter::ReadIterBox};
use crate::{adapted_iter::AdaptedFilesIterBox, config::RgaConfig, matching::*};
use anyhow::*;
// use custom::builtin_spawning_adapters;
//use custom::CustomAdapterConfig;
@ -84,7 +84,7 @@ pub trait FileAdapter: GetMetadata {
&self,
a: AdaptInfo<'a>,
detection_reason: &FileMatcher,
) -> Result<ReadIterBox<'a>>;
) -> Result<AdaptedFilesIterBox<'a>>;
}
pub struct AdaptInfo<'a> {

View File

@ -232,16 +232,16 @@ mod test {
let (a, d) = simple_adapt_info(&filepath, Box::new(File::open(&filepath)?));
let mut r = adapter.adapt(a, &d)?;
let mut o = Vec::new();
r.read_to_end(&mut o)?;
let o = adapted_to_vec(r)?;
assert_eq!(
String::from_utf8(o)?,
"hello world
this is just a test.
1
\u{c}"
"PREFIX:hello world
PREFIX:this is just a test.
PREFIX:
PREFIX:1
PREFIX:
PREFIX:\u{c}
"
);
Ok(())
}

View File

@ -6,10 +6,39 @@ use anyhow::Result;
use std::{cmp::min, io::Read};
use crate::read_iter::{ReadIterBox, SingleReadIter};
use crate::adapted_iter::{AdaptedFilesIterBox, SingleAdaptedFileAsIter};
use super::{AdaptInfo, AdapterMeta, FileAdapter, GetMetadata};
pub struct EnsureEndsWithNewline<R: Read> {
inner: R,
added_newline: bool,
}
impl<R: Read> EnsureEndsWithNewline<R> {
pub fn new(r: R) -> EnsureEndsWithNewline<R> {
EnsureEndsWithNewline {
inner: r,
added_newline: false,
}
}
}
impl<R: Read> Read for EnsureEndsWithNewline<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
match self.inner.read(buf) {
Ok(0) => {
if self.added_newline {
Ok(0)
} else {
buf[0] = b'\n';
self.added_newline = true;
Ok(1)
}
}
Ok(n) => Ok(n),
Err(e) => Err(e),
}
}
}
struct ByteReplacer<R>
where
R: Read,
@ -99,15 +128,15 @@ impl FileAdapter for PostprocPrefix {
&self,
a: super::AdaptInfo<'a>,
_detection_reason: &crate::matching::FileMatcher,
) -> Result<ReadIterBox<'a>> {
let read = postproc_prefix(&a.line_prefix, a.inp)?;
) -> Result<AdaptedFilesIterBox<'a>> {
let read = EnsureEndsWithNewline::new(postproc_prefix(&a.line_prefix, a.inp)?);
// keep adapt info (filename etc) except replace inp
let ai = AdaptInfo {
inp: Box::new(read),
postprocess: false,
..a
};
Ok(Box::new(SingleReadIter::new(ai)))
Ok(Box::new(SingleAdaptedFileAsIter::new(ai)))
}
}

View File

@ -1,4 +1,4 @@
use crate::read_iter::SingleReadIter;
use crate::adapted_iter::SingleAdaptedFileAsIter;
use super::*;
use anyhow::*;
@ -146,7 +146,7 @@ impl FileAdapter for SpawningFileAdapter {
&self,
ai: AdaptInfo<'a>,
_detection_reason: &FileMatcher,
) -> Result<ReadIterBox<'a>> {
) -> Result<AdaptedFilesIterBox<'a>> {
let AdaptInfo {
filepath_hint,
mut inp,
@ -164,7 +164,7 @@ impl FileAdapter for SpawningFileAdapter {
.with_context(|| format!("Could not set cmd arguments for {}", self.inner.get_exe()))?;
debug!("executing {:?}", cmd);
let output = pipe_output(&line_prefix, cmd, &mut inp, self.inner.get_exe(), "")?;
Ok(Box::new(SingleReadIter::new(AdaptInfo {
Ok(Box::new(SingleAdaptedFileAsIter::new(AdaptInfo {
filepath_hint,
inp: output,
line_prefix,

View File

@ -1,5 +1,5 @@
use super::*;
use crate::{print_bytes, read_iter::ReadIter};
use crate::{adapted_iter::AdaptedFilesIter, print_bytes};
use anyhow::*;
use lazy_static::lazy_static;
use log::*;
@ -40,7 +40,7 @@ impl FileAdapter for ZipAdapter {
&self,
inp: AdaptInfo<'a>,
_detection_reason: &FileMatcher,
) -> Result<Box<dyn ReadIter + 'a>> {
) -> Result<Box<dyn AdaptedFilesIter + 'a>> {
Ok(Box::new(ZipAdaptIter { inp }))
}
}
@ -48,7 +48,7 @@ impl FileAdapter for ZipAdapter {
struct ZipAdaptIter<'a> {
inp: AdaptInfo<'a>,
}
impl<'a> ReadIter for ZipAdaptIter<'a> {
impl<'a> AdaptedFilesIter for ZipAdaptIter<'a> {
fn next<'b>(&'b mut self) -> Option<AdaptInfo<'b>> {
let line_prefix = &self.inp.line_prefix;
let filepath_hint = &self.inp.filepath_hint;
@ -115,14 +115,11 @@ mod test {
&PathBuf::from("outer.zip"),
Box::new(std::io::Cursor::new(zipfile)),
);
let mut res = RecursingConcattyReader::concat(adapter.adapt(a, &d)?);
let mut buf = Vec::new();
res.read_to_end(&mut buf)?;
let buf = adapted_to_vec(adapter.adapt(a, &d)?)?;
assert_eq!(
String::from_utf8(buf)?,
"PREFIX:outer.txt:outer text file\n",
"PREFIX:outer.txt: outer text file\nPREFIX:inner.zip: inner.txt: inner text file\n",
);
Ok(())

View File

@ -1,7 +1,7 @@
#![warn(clippy::all)]
#![feature(negative_impls)]
#![feature(specialization)]
pub mod adapted_iter;
pub mod adapters;
mod caching_writer;
pub mod config;
@ -9,7 +9,6 @@ pub mod matching;
pub mod pipe;
pub mod preproc;
pub mod preproc_cache;
pub mod read_iter;
pub mod recurse;
#[cfg(test)]
pub mod test_utils;

View File

@ -1,15 +1,15 @@
use crate::preproc::rga_preproc;
use crate::{adapters::*, read_iter::ReadIterBox};
use crate::{adapted_iter::AdaptedFilesIterBox, adapters::*};
use anyhow::*;
use std::io::Read;
pub struct RecursingConcattyReader<'a> {
inp: ReadIterBox<'a>,
inp: AdaptedFilesIterBox<'a>,
cur: Option<ReadBox<'a>>,
}
impl<'a> RecursingConcattyReader<'a> {
pub fn concat(inp: ReadIterBox<'a>) -> Result<Box<dyn Read + 'a>> {
pub fn concat(inp: AdaptedFilesIterBox<'a>) -> Result<Box<dyn Read + 'a>> {
let mut r = RecursingConcattyReader { inp, cur: None };
r.ascend()?;
Ok(Box::new(r))
@ -20,7 +20,7 @@ impl<'a> RecursingConcattyReader<'a> {
// we only need to access the inp: ReadIter when the inner reader is done, so this should be safe
let ai = unsafe {
// would love to make this safe, but how? something like OwnedRef<inp, cur>
(*(inp as *mut ReadIterBox<'a>)).next()
(*(inp as *mut AdaptedFilesIterBox<'a>)).next()
};
self.cur = match ai {
Some(ai) => Some(rga_preproc(ai)?),

View File

@ -1,8 +1,11 @@
use crate::{
adapted_iter::AdaptedFilesIterBox,
adapters::{AdaptInfo, ReadBox},
config::RgaConfig,
matching::{FastFileMatcher, FileMatcher},
recurse::RecursingConcattyReader,
};
use anyhow::Result;
use std::path::{Path, PathBuf};
pub fn test_data_dir() -> PathBuf {
@ -28,3 +31,11 @@ pub fn simple_adapt_info<'a>(filepath: &Path, inp: ReadBox<'a>) -> (AdaptInfo<'a
.into(),
)
}
pub fn adapted_to_vec(adapted: AdaptedFilesIterBox<'_>) -> Result<Vec<u8>> {
let mut res = RecursingConcattyReader::concat(adapted)?;
let mut buf = Vec::new();
res.read_to_end(&mut buf)?;
Ok(buf)
}