From cc744176cac9ed77a550f3c5e027b9e06eddd511 Mon Sep 17 00:00:00 2001 From: phiresky Date: Mon, 28 Sep 2020 22:55:55 +0200 Subject: [PATCH] small amount of progress --- Cargo.lock | 18 ++++++++++++ Cargo.toml | 3 +- src/adapters.rs | 50 ++++++++++++++++++++------------- src/adapters/writing.rs | 13 +++++++++ src/adapters/zip.rs | 62 +++++++++++++++++++++++++++++------------ src/config.rs | 7 ++--- src/preproc.rs | 55 ++++++++++++++++++++++++++++++++---- 7 files changed, 160 insertions(+), 48 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 00adc34..5e8b84a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -751,6 +751,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "owning_ref" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ff55baddef9e4ad00f88b6c743a2a8062d4c6ade126c2a528644b8e444d52ce" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "paste" version = "1.0.0" @@ -957,6 +966,7 @@ dependencies = [ "lazy_static", "log", "memchr", + "owning_ref", "paste", "path-clean", "pretty-bytes", @@ -1119,6 +1129,12 @@ version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbee7696b84bbf3d89a1c2eccff0850e3047ed46bfcd2e92c29a2d074d57e252" +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -1432,6 +1448,8 @@ dependencies = [ [[package]] name = "zip" version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "543adf038106b64cfca4711c82c917d785e3540e04f7996554488f988ec43124" dependencies = [ "byteorder", "bzip2 0.3.3", diff --git a/Cargo.toml b/Cargo.toml index 6168b24..3502bc1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,6 @@ serde = { version = "1.0.115", features = ["derive"] } zstd = "0.5.3" lazy_static = "1.4.0" serde_json = "1.0.57" -zip = {path="../zip-rs"} crossbeam = "0.7.3" clap = { version = "2.33.3", features = ["wrap_help"] } log = "0.4.11" @@ -52,3 +51,5 @@ memchr = "2.3.3" crossbeam-channel = "0.4.4" dyn-clone = "1.0.2" dyn-clonable = "0.9.0" +zip = "0.5.8" +owning_ref = "0.4.1" diff --git a/src/adapters.rs b/src/adapters.rs index feddc85..ef6455e 100644 --- a/src/adapters.rs +++ b/src/adapters.rs @@ -1,18 +1,18 @@ -pub mod custom; -pub mod decompress; -pub mod ffmpeg; +//pub mod custom; +// pub mod decompress; +// pub mod ffmpeg; pub mod fns; // pub mod pdfpages; -pub mod spawning; -pub mod sqlite; +// pub mod spawning; +// pub mod sqlite; // pub mod tar; // pub mod tesseract; -pub mod writing; +// pub mod writing; pub mod zip; use crate::{config::RgaConfig, matching::*}; use anyhow::*; -use custom::builtin_spawning_adapters; -use custom::CustomAdapterConfig; +// use custom::builtin_spawning_adapters; +//use custom::CustomAdapterConfig; use log::*; use std::borrow::Cow; @@ -22,7 +22,7 @@ use std::iter::Iterator; use std::path::{Path, PathBuf}; use std::rc::Rc; -pub type ReadBox<'a> = Box; +pub type ReadBox<'a> = Box; pub struct AdapterMeta { /// unique short name of this adapter (a-z0-9 only) @@ -80,8 +80,18 @@ pub trait FileAdapter: GetMetadata { /// adapt a file. /// /// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher - fn adapt<'a>(&self, a: AdaptInfo<'a>, detection_reason: &FileMatcher) -> Result>; + fn adapt<'a>( + &self, + a: AdaptInfo<'a>, + detection_reason: &FileMatcher, + ) -> Result>; } + +pub trait ReadIter { + // next takes a 'a-lived reference and returns a Read that lives as long as the reference + fn next<'a>(&'a mut self) -> Option>; +} + pub struct AdaptInfo<'a> { /// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions. pub filepath_hint: PathBuf, @@ -99,29 +109,29 @@ pub struct AdaptInfo<'a> { /// (enabledAdapters, disabledAdapters) type AdaptersTuple = (Vec>, Vec>); -pub fn get_all_adapters(custom_adapters: Option>) -> AdaptersTuple { +pub fn get_all_adapters(/*custom_adapters: Option>*/) -> AdaptersTuple { // order in descending priority let mut adapters: Vec> = vec![]; - if let Some(custom_adapters) = custom_adapters { + /*if let Some(custom_adapters) = custom_adapters { for adapter_config in custom_adapters { adapters.push(Rc::new(adapter_config.to_adapter())); } - } + }*/ let internal_adapters: Vec> = vec![ - Rc::new(ffmpeg::FFmpegAdapter::new()), + //Rc::new(ffmpeg::FFmpegAdapter::new()), Rc::new(zip::ZipAdapter::new()), - Rc::new(decompress::DecompressAdapter::new()), + //Rc::new(decompress::DecompressAdapter::new()), // Rc::new(tar::TarAdapter::new()), - Rc::new(sqlite::SqliteAdapter::new()), + //Rc::new(sqlite::SqliteAdapter::new()), // Rc::new(pdfpages::PdfPagesAdapter::new()), // Rc::new(tesseract::TesseractAdapter::new()), ]; - adapters.extend( + /*adapters.extend( builtin_spawning_adapters .iter() .map(|e| -> Rc { Rc::new(e.clone().to_adapter()) }), - ); + );*/ adapters.extend(internal_adapters); adapters @@ -138,10 +148,10 @@ pub fn get_all_adapters(custom_adapters: Option>) -> Ad * - "+a,b" means use default list but also a and b (a,b will be prepended to the list so given higher priority) */ pub fn get_adapters_filtered>( - custom_adapters: Option>, + /*custom_adapters: Option>,*/ adapter_names: &Vec, ) -> Result>> { - let (def_enabled_adapters, def_disabled_adapters) = get_all_adapters(custom_adapters); + let (def_enabled_adapters, def_disabled_adapters) = get_all_adapters(/*custom_adapters*/); let adapters = if !adapter_names.is_empty() { let adapters_map: HashMap<_, _> = def_enabled_adapters .iter() diff --git a/src/adapters/writing.rs b/src/adapters/writing.rs index 6824e02..132f7af 100644 --- a/src/adapters/writing.rs +++ b/src/adapters/writing.rs @@ -1,6 +1,8 @@ use super::{FileAdapter, GetMetadata, ReadBox}; use anyhow::Result; +use std::io::Read; use std::io::Write; +use std::thread::Thread; // this trait / struct split is ugly but necessary because of "conflicting trait implementation" otherwise with SpawningFileAdapter #[dyn_clonable::clonable] @@ -28,6 +30,17 @@ impl GetMetadata for WritingFileAdapter { } } +struct PipedReadWriter<'a> { + inner: ReadBox<'a>, + pipe_thread: Thread, +} + +impl<'a> Read for PipedReadWriter<'a> { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + todo!() + } +} + impl FileAdapter for WritingFileAdapter { fn adapt<'a>( &self, diff --git a/src/adapters/zip.rs b/src/adapters/zip.rs index 1d41179..521a604 100644 --- a/src/adapters/zip.rs +++ b/src/adapters/zip.rs @@ -4,7 +4,6 @@ use ::zip::read::ZipFile; use anyhow::*; use lazy_static::lazy_static; use log::*; -use writing::{WritingFileAdapter, WritingFileAdapterTrait}; // todo: // maybe todo: read list of extensions from @@ -30,8 +29,8 @@ lazy_static! { pub struct ZipAdapter; impl ZipAdapter { - pub fn new() -> WritingFileAdapter { - WritingFileAdapter::new(Box::new(ZipAdapter)) + pub fn new() -> ZipAdapter { + ZipAdapter } } impl GetMetadata for ZipAdapter { @@ -49,22 +48,49 @@ fn is_dir(f: &ZipFile) -> bool { .map_or(false, |c| c == '/' || c == '\\') } -impl WritingFileAdapterTrait for ZipAdapter { - fn adapt_write<'a>( +struct OutIter<'a> { + inp: AdaptInfo<'a>, +} +impl<'a> ReadIter for OutIter<'a> { + fn next<'b>(&'b mut self) -> Option> { + let line_prefix = "todo"; + let filepath_hint = std::path::PathBuf::from("hello"); + let archive_recursion_depth = 1; + ::zip::read::read_zipfile_from_stream(&mut self.inp.inp) + .unwrap() + .and_then(|file| { + if is_dir(&file) { + return None; + } + debug!( + "{}{}|{}: {} ({} packed)", + line_prefix, + filepath_hint.to_string_lossy(), + file.name(), + print_bytes(file.size() as f64), + print_bytes(file.compressed_size() as f64) + ); + let line_prefix = format!("{}{}: ", line_prefix, file.name()); + Some(AdaptInfo { + filepath_hint: file.sanitized_name().clone(), + is_real_file: false, + inp: Box::new(file), + line_prefix, + archive_recursion_depth: archive_recursion_depth + 1, + config: RgaConfig::default(), //config.clone(), + }) + }) + } +} + +impl FileAdapter for ZipAdapter { + fn adapt<'a>( &self, ai: AdaptInfo<'a>, - _detection_reason: &FileMatcher, - oup: &mut (dyn Write + 'a), - ) -> Result<()> { - let AdaptInfo { - filepath_hint, - mut inp, - line_prefix, - archive_recursion_depth, - config, - .. - } = ai; - loop { + detection_reason: &FileMatcher, + ) -> Result> { + Ok(Box::new(OutIter { inp: ai })) + /*loop { match ::zip::read::read_zipfile_from_stream(&mut inp) { Ok(None) => break, Ok(Some(mut file)) => { @@ -95,6 +121,6 @@ impl WritingFileAdapterTrait for ZipAdapter { Err(e) => return Err(e.into()), } } - Ok(()) + Ok(())*/ } } diff --git a/src/config.rs b/src/config.rs index 7858ec1..b734912 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,4 +1,4 @@ -use crate::{adapters::custom::CustomAdapterConfig, project_dirs}; +use crate::project_dirs; use anyhow::*; use derive_more::FromStr; use log::*; @@ -151,13 +151,12 @@ pub struct RgaConfig { )] pub max_archive_recursion: MaxArchiveRecursion, - ////////////////////////////////////////// + /* ////////////////////////////////////////// //////////////////////////// Config file only ////////////////////////////////////////// #[serde(default, skip_serializing_if = "is_default")] #[structopt(skip)] - pub custom_adapters: Option>, - + pub custom_adapters: Option>,*/ ////////////////////////////////////////// //////////////////////////// CMD line only ////////////////////////////////////////// diff --git a/src/preproc.rs b/src/preproc.rs index 231b3d4..57ebce7 100644 --- a/src/preproc.rs +++ b/src/preproc.rs @@ -6,8 +6,9 @@ use crate::{ }; use anyhow::*; use log::*; +use owning_ref::OwningRefMut; use path_clean::PathClean; -use std::convert::TryInto; +use std::{convert::TryInto, io::Read}; use std::io::{BufRead, BufReader}; @@ -30,7 +31,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result { } = ai; debug!("path (hint) to preprocess: {:?}", filepath_hint); let filtered_adapters = - get_adapters_filtered(config.custom_adapters.clone(), &config.adapters)?; + get_adapters_filtered(/*config.custom_adapters.clone(),*/ &config.adapters)?; let adapters = adapter_matcher(&filtered_adapters, config.accurate)?; let filename = filepath_hint .file_name() @@ -87,6 +88,48 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result { } } +struct ConcattyReader<'a> { + inp: Box, + cur: Option>, +} +impl<'a> ConcattyReader<'a> { + fn ascend(&mut self) { + self.cur = unsafe { + // would love to make this safe, but how? + let r: *mut Box = &mut self.inp; + (*r).next() + }; + eprintln!( + "ascended to {}", + self.cur + .as_ref() + .map(|e| e.filepath_hint.to_string_lossy().into_owned()) + .unwrap_or("END".to_string()) + ); + } +} +impl<'a> Read for ConcattyReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + match &mut self.cur { + None => Ok(0), // last file ended + Some(cur) => match cur.inp.read(buf) { + Err(e) => Err(e), + Ok(0) => { + // current file ended, go to next file + self.ascend(); + self.read(buf) + } + Ok(n) => Ok(n), + }, + } + } +} +fn concattyreader<'a>(inp: Box) -> Box { + let mut r = ConcattyReader { inp, cur: None }; + r.ascend(); + Box::new(r) +} + fn run_adapter<'a>( ai: AdaptInfo<'a>, adapter: Rc, @@ -173,6 +216,8 @@ fn run_adapter<'a>( meta.name ) })?; + while let Some(innerinp) = inp.next() {} + /*let inp = concattyreader(inp); let inp = CachingReader::new( inp, cache_max_blob_len.0.try_into().unwrap(), @@ -188,7 +233,7 @@ fn run_adapter<'a>( } Ok(()) }), - )?; + )?;*/ Ok(Box::new(inp)) } @@ -203,7 +248,7 @@ fn run_adapter<'a>( line_prefix, filepath_hint: filepath_hint.clone(), is_real_file, - inp: Box::new(inp), + inp, archive_recursion_depth, config, }, @@ -221,6 +266,6 @@ fn run_adapter<'a>( adapter.metadata().name, print_dur(start) ); - Ok(oread) + Ok(concattyreader(oread)) } }