fix pinning and sending

This commit is contained in:
phiresky 2022-11-05 00:47:43 +01:00
parent 002c62f57c
commit 42fe225373
6 changed files with 80 additions and 65 deletions

View File

@ -1,8 +1,10 @@
use std::pin::Pin;
use tokio_stream::Stream;
use crate::adapters::AdaptInfo;
pub trait AdaptedFilesIter: Stream<Item = AdaptInfo> + Send + Unpin {}
impl<T> AdaptedFilesIter for T where T: Stream<Item = AdaptInfo> + Send + Unpin {}
pub trait AdaptedFilesIter: Stream<Item = AdaptInfo> + Send {}
impl<T> AdaptedFilesIter for T where T: Stream<Item = AdaptInfo> + Send {}
pub type AdaptedFilesIterBox = Box<dyn AdaptedFilesIter>;
pub type AdaptedFilesIterBox = Pin<Box<dyn AdaptedFilesIter>>;

View File

@ -4,6 +4,7 @@ pub mod custom;
// pub mod postproc;
// pub mod pdfpages;
pub mod spawning;
use std::sync::Arc;
// pub mod sqlite;
// pub mod tar;
// pub mod tesseract;
@ -22,6 +23,7 @@ use std::iter::Iterator;
use std::path::PathBuf;
use std::pin::Pin;
use std::rc::Rc;
use core::fmt::Debug;
pub type ReadBox = Pin<Box<dyn AsyncRead + Send>>;
pub struct AdapterMeta {
@ -76,7 +78,7 @@ impl AdapterMeta {
pub trait GetMetadata {
fn metadata(&self) -> &AdapterMeta;
}
pub trait FileAdapter: GetMetadata {
pub trait FileAdapter: GetMetadata + Send + Sync{
/// adapt a file.
///
/// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
@ -99,22 +101,22 @@ pub struct AdaptInfo {
/// prefix every output line with this string to better indicate the file's location if it is in some archive
pub line_prefix: String,
pub postprocess: bool,
pub config: RgaConfig
pub config: RgaConfig,
}
/// (enabledAdapters, disabledAdapters)
type AdaptersTuple = (Vec<Rc<dyn FileAdapter>>, Vec<Rc<dyn FileAdapter>>);
type AdaptersTuple = (Vec<Arc<dyn FileAdapter>>, Vec<Arc<dyn FileAdapter>>);
pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> AdaptersTuple {
// order in descending priority
let mut adapters: Vec<Rc<dyn FileAdapter>> = vec![];
let mut adapters: Vec<Arc<dyn FileAdapter>> = vec![];
if let Some(custom_adapters) = custom_adapters {
for adapter_config in custom_adapters {
adapters.push(Rc::new(adapter_config.to_adapter()));
adapters.push(Arc::new(adapter_config.to_adapter()));
}
}
let internal_adapters: Vec<Rc<dyn FileAdapter>> = vec![
let internal_adapters: Vec<Arc<dyn FileAdapter>> = vec![
//Rc::new(ffmpeg::FFmpegAdapter::new()),
// Rc::new(zip::ZipAdapter::new()),
//Rc::new(decompress::DecompressAdapter::new()),
@ -126,7 +128,7 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
adapters.extend(
builtin_spawning_adapters
.iter()
.map(|e| -> Rc<dyn FileAdapter> { Rc::new(e.to_adapter()) }),
.map(|e| -> Arc<dyn FileAdapter> { Arc::new(e.to_adapter()) }),
);
adapters.extend(internal_adapters);
@ -146,7 +148,7 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
pub fn get_adapters_filtered<T: AsRef<str>>(
custom_adapters: Option<Vec<CustomAdapterConfig>>,
adapter_names: &Vec<T>,
) -> Result<Vec<Rc<dyn FileAdapter>>> {
) -> Result<Vec<Arc<dyn FileAdapter>>> {
let (def_enabled_adapters, def_disabled_adapters) = get_all_adapters(custom_adapters);
let adapters = if !adapter_names.is_empty() {
let adapters_map: HashMap<_, _> = def_enabled_adapters

View File

@ -14,7 +14,7 @@ use tokio::io::AsyncReadExt;
use tokio::process::{Child, Command};
// TODO: don't separate the trait and the struct
pub trait SpawningFileAdapterTrait: GetMetadata {
pub trait SpawningFileAdapterTrait: GetMetadata + Send + Sync {
fn get_exe(&self) -> &str;
fn command(&self, filepath_hint: &Path, command: Command) -> Result<Command>;
}
@ -123,7 +123,7 @@ impl FileAdapter for SpawningFileAdapter {
.with_context(|| format!("Could not set cmd arguments for {}", self.inner.get_exe()))?;
debug!("executing {:?}", cmd);
let output = pipe_output(&line_prefix, cmd, inp, self.inner.get_exe(), "")?;
Ok(Box::new(tokio_stream::once(AdaptInfo {
Ok(Box::pin(tokio_stream::once(AdaptInfo {
filepath_hint: PathBuf::from(format!("{}.txt", filepath_hint.to_string_lossy())), // TODO: customizable
inp: output,
line_prefix,

View File

@ -15,7 +15,7 @@ fn list_adapters(args: RgaConfig) -> Result<()> {
let (enabled_adapters, disabled_adapters) = get_all_adapters(args.custom_adapters.clone());
println!("Adapters:\n");
let print = |adapter: std::rc::Rc<dyn FileAdapter>| {
let print = |adapter: std::rc::Arc<dyn FileAdapter>| {
let meta = adapter.metadata();
let matchers = meta
.fast_matchers

View File

@ -9,7 +9,7 @@ use regex::{Regex, RegexSet};
use std::iter::Iterator;
use std::rc::Rc;
use std::sync::Arc;
// match only based on file path
#[derive(Clone, Debug)]
@ -54,9 +54,9 @@ pub fn extension_to_regex(extension: &str) -> Regex {
}
pub fn adapter_matcher(
adapters: &Vec<Rc<dyn FileAdapter>>,
adapters: &Vec<Arc<dyn FileAdapter>>,
slow: bool,
) -> Result<impl Fn(FileMeta) -> Option<(Rc<dyn FileAdapter>, FileMatcher)>> {
) -> Result<impl Fn(FileMeta) -> Option<(Arc<dyn FileAdapter>, FileMatcher)>> {
// need order later
let adapter_names: Vec<String> = adapters.iter().map(|e| e.metadata().name.clone()).collect();
let mut fname_regexes = vec![];

View File

@ -13,6 +13,7 @@ use async_compression::tokio::bufread::ZstdDecoder;
use async_stream::stream;
use log::*;
use path_clean::PathClean;
use std::sync::Arc;
// use postproc::PostprocPrefix;
use std::convert::TryInto;
use std::io::Cursor;
@ -21,16 +22,14 @@ use tokio::io::AsyncBufRead;
use tokio::io::AsyncBufReadExt;
use tokio::io::BufReader;
use std::rc::Rc;
type ActiveAdapters = Vec<Rc<dyn FileAdapter>>;
type ActiveAdapters = Vec<Arc<dyn FileAdapter>>;
async fn choose_adapter(
config: &RgaConfig,
filepath_hint: &Path,
archive_recursion_depth: i32,
inp: &mut (impl AsyncBufRead + Unpin),
) -> Result<Option<(Rc<dyn FileAdapter>, FileMatcher, ActiveAdapters)>> {
) -> Result<Option<(Arc<dyn FileAdapter>, FileMatcher, ActiveAdapters)>> {
let active_adapters = get_adapters_filtered(config.custom_adapters.clone(), &config.adapters)?;
let adapters = adapter_matcher(&active_adapters, config.accurate)?;
let filename = filepath_hint
@ -52,6 +51,46 @@ async fn choose_adapter(
});
Ok(adapter.map(|e| (e.0, e.1, active_adapters)))
}
async fn buf_choose_adapter(ai: AdaptInfo) -> Result<(AdaptInfo, Option<(Arc<dyn FileAdapter>, FileMatcher, ActiveAdapters)>)> {
let mut inp = BufReader::with_capacity(1 << 16, ai.inp);
let adapter = choose_adapter(
&ai.config,
&ai.filepath_hint,
ai.archive_recursion_depth,
&mut inp,
)
.await?;
let ai = AdaptInfo {
inp: Box::pin(inp),
..ai
};
Ok((ai, adapter))
}
fn handle_no_adapter(ai: AdaptInfo) -> Result<AdaptInfo> {
// allow passthrough if the file is in an archive or accurate matching is enabled
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
let allow_cat = !ai.is_real_file || ai.config.accurate;
if allow_cat {
if ai.postprocess {
panic!("not implemented");
/* (
Rc::new(PostprocPrefix {}) as Arc<dyn FileAdapter>,
FileMatcher::Fast(FastFileMatcher::FileExtension("default".to_string())), // todo: separate enum value for this
)*/
} else {
return Ok(ai);
}
} else {
return Err(format_err!(
"No adapter found for file {:?}, passthrough disabled.",
ai.filepath_hint
.file_name()
.ok_or_else(|| format_err!("Empty filename"))?
));
}
}
/**
* preprocess a file as defined in `ai`.
*
@ -67,46 +106,13 @@ pub async fn rga_preproc(ai: AdaptInfo) -> Result<ReadBox> {
// todo: figure out when using a bufreader is a good idea and when it is not
// seems to be good for File::open() reads, but not sure about within archives (tar, zip)
let mut inp = BufReader::with_capacity(1 << 16, ai.inp);
let adapter = choose_adapter(
&ai.config,
&ai.filepath_hint,
ai.archive_recursion_depth,
&mut inp,
)
.await?;
let (adapter, detection_reason, active_adapters) = match adapter {
Some((a, d, e)) => (a, d, e),
None => {
// allow passthrough if the file is in an archive or accurate matching is enabled
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
let allow_cat = !ai.is_real_file || ai.config.accurate;
if allow_cat {
if ai.postprocess {
panic!("not implemented");
/* (
Rc::new(PostprocPrefix {}) as Rc<dyn FileAdapter>,
FileMatcher::Fast(FastFileMatcher::FileExtension("default".to_string())), // todo: separate enum value for this
)*/
} else {
return Ok(Box::pin(inp));
}
} else {
return Err(format_err!(
"No adapter found for file {:?}, passthrough disabled.",
ai.filepath_hint
.file_name()
.ok_or_else(|| format_err!("Empty filename"))?
));
}
}
let (ai, adapter) = buf_choose_adapter(ai).await?;
let Some((adapter, detection_reason, active_adapters)) = adapter else {
return handle_no_adapter(ai).map(|ai| ai.inp);
};
let path_hint_copy = ai.filepath_hint.clone();
run_adapter_recursively(
AdaptInfo {
inp: Box::pin(inp),
..ai
},
adapt_caching(
ai,
adapter,
detection_reason,
active_adapters,
@ -144,9 +150,10 @@ fn compute_cache_key(
bincode::serialize(&key).context("could not serialize path")
}
}
async fn run_adapter_recursively(
async fn adapt_caching(
ai: AdaptInfo,
adapter: Rc<dyn FileAdapter>,
adapter: Arc<dyn FileAdapter>,
detection_reason: FileMatcher,
active_adapters: ActiveAdapters,
) -> Result<ReadBox> {
@ -220,11 +227,15 @@ fn loop_adapt(
let s = stream! {
for await file in inp {
let (adapter, detection_reason) = choose_adapter(file.config, file.filepath_hint,file.archive_recursion_depth, file.inp);
for file in loop_adapt(adapter, detection_reason, file) {
yield file;
let (file, chosen_adapter) = buf_choose_adapter(file).await.expect("todo: handle");
if let Some((adapter, detection_reason, active_adapters)) = chosen_adapter {
for await file in loop_adapt(adapter.as_ref(), detection_reason, file).expect("todo: handle") {
yield file;
}
} else {
yield handle_no_adapter(file).expect("todo: handle");
}
}
};
Ok(inp)
Ok(Box::pin(s))
}