small amount of progress

This commit is contained in:
phiresky 2020-09-28 22:55:55 +02:00
parent bcc01f7a62
commit cc744176ca
7 changed files with 160 additions and 48 deletions

18
Cargo.lock generated
View File

@ -751,6 +751,15 @@ dependencies = [
"num-traits",
]
[[package]]
name = "owning_ref"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ff55baddef9e4ad00f88b6c743a2a8062d4c6ade126c2a528644b8e444d52ce"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "paste"
version = "1.0.0"
@ -957,6 +966,7 @@ dependencies = [
"lazy_static",
"log",
"memchr",
"owning_ref",
"paste",
"path-clean",
"pretty-bytes",
@ -1119,6 +1129,12 @@ version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbee7696b84bbf3d89a1c2eccff0850e3047ed46bfcd2e92c29a2d074d57e252"
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
@ -1432,6 +1448,8 @@ dependencies = [
[[package]]
name = "zip"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "543adf038106b64cfca4711c82c917d785e3540e04f7996554488f988ec43124"
dependencies = [
"byteorder",
"bzip2 0.3.3",

View File

@ -25,7 +25,6 @@ serde = { version = "1.0.115", features = ["derive"] }
zstd = "0.5.3"
lazy_static = "1.4.0"
serde_json = "1.0.57"
zip = {path="../zip-rs"}
crossbeam = "0.7.3"
clap = { version = "2.33.3", features = ["wrap_help"] }
log = "0.4.11"
@ -52,3 +51,5 @@ memchr = "2.3.3"
crossbeam-channel = "0.4.4"
dyn-clone = "1.0.2"
dyn-clonable = "0.9.0"
zip = "0.5.8"
owning_ref = "0.4.1"

View File

@ -1,18 +1,18 @@
pub mod custom;
pub mod decompress;
pub mod ffmpeg;
//pub mod custom;
// pub mod decompress;
// pub mod ffmpeg;
pub mod fns;
// pub mod pdfpages;
pub mod spawning;
pub mod sqlite;
// pub mod spawning;
// pub mod sqlite;
// pub mod tar;
// pub mod tesseract;
pub mod writing;
// pub mod writing;
pub mod zip;
use crate::{config::RgaConfig, matching::*};
use anyhow::*;
use custom::builtin_spawning_adapters;
use custom::CustomAdapterConfig;
// use custom::builtin_spawning_adapters;
//use custom::CustomAdapterConfig;
use log::*;
use std::borrow::Cow;
@ -22,7 +22,7 @@ use std::iter::Iterator;
use std::path::{Path, PathBuf};
use std::rc::Rc;
pub type ReadBox<'a> = Box<dyn Read + Send + 'a>;
pub type ReadBox<'a> = Box<dyn Read + 'a>;
pub struct AdapterMeta {
/// unique short name of this adapter (a-z0-9 only)
@ -80,8 +80,18 @@ pub trait FileAdapter: GetMetadata {
/// adapt a file.
///
/// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
fn adapt<'a>(&self, a: AdaptInfo<'a>, detection_reason: &FileMatcher) -> Result<ReadBox<'a>>;
fn adapt<'a>(
&self,
a: AdaptInfo<'a>,
detection_reason: &FileMatcher,
) -> Result<Box<dyn ReadIter + 'a>>;
}
pub trait ReadIter {
// next takes a 'a-lived reference and returns a Read that lives as long as the reference
fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>>;
}
pub struct AdaptInfo<'a> {
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
pub filepath_hint: PathBuf,
@ -99,29 +109,29 @@ pub struct AdaptInfo<'a> {
/// (enabledAdapters, disabledAdapters)
type AdaptersTuple = (Vec<Rc<dyn FileAdapter>>, Vec<Rc<dyn FileAdapter>>);
pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> AdaptersTuple {
pub fn get_all_adapters(/*custom_adapters: Option<Vec<CustomAdapterConfig>>*/) -> AdaptersTuple {
// order in descending priority
let mut adapters: Vec<Rc<dyn FileAdapter>> = vec![];
if let Some(custom_adapters) = custom_adapters {
/*if let Some(custom_adapters) = custom_adapters {
for adapter_config in custom_adapters {
adapters.push(Rc::new(adapter_config.to_adapter()));
}
}
}*/
let internal_adapters: Vec<Rc<dyn FileAdapter>> = vec![
Rc::new(ffmpeg::FFmpegAdapter::new()),
//Rc::new(ffmpeg::FFmpegAdapter::new()),
Rc::new(zip::ZipAdapter::new()),
Rc::new(decompress::DecompressAdapter::new()),
//Rc::new(decompress::DecompressAdapter::new()),
// Rc::new(tar::TarAdapter::new()),
Rc::new(sqlite::SqliteAdapter::new()),
//Rc::new(sqlite::SqliteAdapter::new()),
// Rc::new(pdfpages::PdfPagesAdapter::new()),
// Rc::new(tesseract::TesseractAdapter::new()),
];
adapters.extend(
/*adapters.extend(
builtin_spawning_adapters
.iter()
.map(|e| -> Rc<dyn FileAdapter> { Rc::new(e.clone().to_adapter()) }),
);
);*/
adapters.extend(internal_adapters);
adapters
@ -138,10 +148,10 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
* - "+a,b" means use default list but also a and b (a,b will be prepended to the list so given higher priority)
*/
pub fn get_adapters_filtered<T: AsRef<str>>(
custom_adapters: Option<Vec<CustomAdapterConfig>>,
/*custom_adapters: Option<Vec<CustomAdapterConfig>>,*/
adapter_names: &Vec<T>,
) -> Result<Vec<Rc<dyn FileAdapter>>> {
let (def_enabled_adapters, def_disabled_adapters) = get_all_adapters(custom_adapters);
let (def_enabled_adapters, def_disabled_adapters) = get_all_adapters(/*custom_adapters*/);
let adapters = if !adapter_names.is_empty() {
let adapters_map: HashMap<_, _> = def_enabled_adapters
.iter()

View File

@ -1,6 +1,8 @@
use super::{FileAdapter, GetMetadata, ReadBox};
use anyhow::Result;
use std::io::Read;
use std::io::Write;
use std::thread::Thread;
// this trait / struct split is ugly but necessary because of "conflicting trait implementation" otherwise with SpawningFileAdapter
#[dyn_clonable::clonable]
@ -28,6 +30,17 @@ impl GetMetadata for WritingFileAdapter {
}
}
struct PipedReadWriter<'a> {
inner: ReadBox<'a>,
pipe_thread: Thread,
}
impl<'a> Read for PipedReadWriter<'a> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
todo!()
}
}
impl FileAdapter for WritingFileAdapter {
fn adapt<'a>(
&self,

View File

@ -4,7 +4,6 @@ use ::zip::read::ZipFile;
use anyhow::*;
use lazy_static::lazy_static;
use log::*;
use writing::{WritingFileAdapter, WritingFileAdapterTrait};
// todo:
// maybe todo: read list of extensions from
@ -30,8 +29,8 @@ lazy_static! {
pub struct ZipAdapter;
impl ZipAdapter {
pub fn new() -> WritingFileAdapter {
WritingFileAdapter::new(Box::new(ZipAdapter))
pub fn new() -> ZipAdapter {
ZipAdapter
}
}
impl GetMetadata for ZipAdapter {
@ -49,22 +48,49 @@ fn is_dir(f: &ZipFile) -> bool {
.map_or(false, |c| c == '/' || c == '\\')
}
impl WritingFileAdapterTrait for ZipAdapter {
fn adapt_write<'a>(
struct OutIter<'a> {
inp: AdaptInfo<'a>,
}
impl<'a> ReadIter for OutIter<'a> {
fn next<'b>(&'b mut self) -> Option<AdaptInfo<'b>> {
let line_prefix = "todo";
let filepath_hint = std::path::PathBuf::from("hello");
let archive_recursion_depth = 1;
::zip::read::read_zipfile_from_stream(&mut self.inp.inp)
.unwrap()
.and_then(|file| {
if is_dir(&file) {
return None;
}
debug!(
"{}{}|{}: {} ({} packed)",
line_prefix,
filepath_hint.to_string_lossy(),
file.name(),
print_bytes(file.size() as f64),
print_bytes(file.compressed_size() as f64)
);
let line_prefix = format!("{}{}: ", line_prefix, file.name());
Some(AdaptInfo {
filepath_hint: file.sanitized_name().clone(),
is_real_file: false,
inp: Box::new(file),
line_prefix,
archive_recursion_depth: archive_recursion_depth + 1,
config: RgaConfig::default(), //config.clone(),
})
})
}
}
impl FileAdapter for ZipAdapter {
fn adapt<'a>(
&self,
ai: AdaptInfo<'a>,
_detection_reason: &FileMatcher,
oup: &mut (dyn Write + 'a),
) -> Result<()> {
let AdaptInfo {
filepath_hint,
mut inp,
line_prefix,
archive_recursion_depth,
config,
..
} = ai;
loop {
detection_reason: &FileMatcher,
) -> Result<Box<dyn ReadIter + 'a>> {
Ok(Box::new(OutIter { inp: ai }))
/*loop {
match ::zip::read::read_zipfile_from_stream(&mut inp) {
Ok(None) => break,
Ok(Some(mut file)) => {
@ -95,6 +121,6 @@ impl WritingFileAdapterTrait for ZipAdapter {
Err(e) => return Err(e.into()),
}
}
Ok(())
Ok(())*/
}
}

View File

@ -1,4 +1,4 @@
use crate::{adapters::custom::CustomAdapterConfig, project_dirs};
use crate::project_dirs;
use anyhow::*;
use derive_more::FromStr;
use log::*;
@ -151,13 +151,12 @@ pub struct RgaConfig {
)]
pub max_archive_recursion: MaxArchiveRecursion,
//////////////////////////////////////////
/* //////////////////////////////////////////
//////////////////////////// Config file only
//////////////////////////////////////////
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(skip)]
pub custom_adapters: Option<Vec<CustomAdapterConfig>>,
pub custom_adapters: Option<Vec<CustomAdapterConfig>>,*/
//////////////////////////////////////////
//////////////////////////// CMD line only
//////////////////////////////////////////

View File

@ -6,8 +6,9 @@ use crate::{
};
use anyhow::*;
use log::*;
use owning_ref::OwningRefMut;
use path_clean::PathClean;
use std::convert::TryInto;
use std::{convert::TryInto, io::Read};
use std::io::{BufRead, BufReader};
@ -30,7 +31,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<ReadBox> {
} = ai;
debug!("path (hint) to preprocess: {:?}", filepath_hint);
let filtered_adapters =
get_adapters_filtered(config.custom_adapters.clone(), &config.adapters)?;
get_adapters_filtered(/*config.custom_adapters.clone(),*/ &config.adapters)?;
let adapters = adapter_matcher(&filtered_adapters, config.accurate)?;
let filename = filepath_hint
.file_name()
@ -87,6 +88,48 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<ReadBox> {
}
}
struct ConcattyReader<'a> {
inp: Box<dyn ReadIter + 'a>,
cur: Option<AdaptInfo<'a>>,
}
impl<'a> ConcattyReader<'a> {
fn ascend(&mut self) {
self.cur = unsafe {
// would love to make this safe, but how?
let r: *mut Box<dyn ReadIter + 'a> = &mut self.inp;
(*r).next()
};
eprintln!(
"ascended to {}",
self.cur
.as_ref()
.map(|e| e.filepath_hint.to_string_lossy().into_owned())
.unwrap_or("END".to_string())
);
}
}
impl<'a> Read for ConcattyReader<'a> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
match &mut self.cur {
None => Ok(0), // last file ended
Some(cur) => match cur.inp.read(buf) {
Err(e) => Err(e),
Ok(0) => {
// current file ended, go to next file
self.ascend();
self.read(buf)
}
Ok(n) => Ok(n),
},
}
}
}
fn concattyreader<'a>(inp: Box<dyn ReadIter + 'a>) -> Box<dyn Read + 'a> {
let mut r = ConcattyReader { inp, cur: None };
r.ascend();
Box::new(r)
}
fn run_adapter<'a>(
ai: AdaptInfo<'a>,
adapter: Rc<dyn FileAdapter>,
@ -173,6 +216,8 @@ fn run_adapter<'a>(
meta.name
)
})?;
while let Some(innerinp) = inp.next() {}
/*let inp = concattyreader(inp);
let inp = CachingReader::new(
inp,
cache_max_blob_len.0.try_into().unwrap(),
@ -188,7 +233,7 @@ fn run_adapter<'a>(
}
Ok(())
}),
)?;
)?;*/
Ok(Box::new(inp))
}
@ -203,7 +248,7 @@ fn run_adapter<'a>(
line_prefix,
filepath_hint: filepath_hint.clone(),
is_real_file,
inp: Box::new(inp),
inp,
archive_recursion_depth,
config,
},
@ -221,6 +266,6 @@ fn run_adapter<'a>(
adapter.metadata().name,
print_dur(start)
);
Ok(oread)
Ok(concattyreader(oread))
}
}