fix recursion and pdf adapter

This commit is contained in:
phiresky 2022-12-25 18:27:50 +01:00
parent 616c0a560b
commit ddeceb0ce9
4 changed files with 31 additions and 7 deletions

View File

@ -25,6 +25,8 @@ use std::path::PathBuf;
use std::pin::Pin;
use std::rc::Rc;
use self::postproc::PostprocPageBreaks;
pub type ReadBox = Pin<Box<dyn AsyncRead + Send>>;
pub struct AdapterMeta {
/// unique short name of this adapter (a-z0-9 only)
@ -117,6 +119,7 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
}
let internal_adapters: Vec<Arc<dyn FileAdapter>> = vec![
Arc::new(PostprocPageBreaks::new()),
//Rc::new(ffmpeg::FFmpegAdapter::new()),
// Rc::new(zip::ZipAdapter::new()),
//Rc::new(decompress::DecompressAdapter::new()),

View File

@ -213,9 +213,6 @@ fn arg_replacer(arg: &str, filepath_hint: &Path) -> Result<String> {
}))
}
impl CustomSpawningFileAdapter {
fn get_exe(&self) -> &str {
&self.binary
}
fn command(
&self,
filepath_hint: &std::path::Path,
@ -266,7 +263,7 @@ impl FileAdapter for CustomSpawningFileAdapter {
inp: output,
line_prefix,
is_real_file: false,
archive_recursion_depth,
archive_recursion_depth: archive_recursion_depth + 1,
postprocess,
config,
})))

View File

@ -8,7 +8,9 @@ use async_stream::stream;
use bytes::Bytes;
use encoding_rs_io::DecodeReaderBytesBuilder;
use std::cmp::min;
use std::ffi::OsStr;
use std::io::Cursor;
use std::path::PathBuf;
use std::pin::Pin;
use tokio::io::{AsyncRead, AsyncReadExt};
use tokio_util::io::ReaderStream;
@ -145,6 +147,11 @@ pub fn postproc_prefix(line_prefix: &str, inp: impl AsyncRead + Send) -> impl As
}
pub struct PostprocPageBreaks {}
impl PostprocPageBreaks {
pub fn new() -> Self {
Self {}
}
}
impl GetMetadata for PostprocPageBreaks {
fn metadata(&self) -> &super::AdapterMeta {
lazy_static::lazy_static! {
@ -176,6 +183,13 @@ impl FileAdapter for PostprocPageBreaks {
let ai = AdaptInfo {
inp: Box::pin(read),
postprocess: false,
archive_recursion_depth: a.archive_recursion_depth + 1,
filepath_hint: a
.filepath_hint
.parent()
.map(PathBuf::from)
.unwrap_or(PathBuf::new())
.join(a.filepath_hint.file_stem().unwrap_or(OsStr::new(""))),
..a
};
Ok(Box::pin(tokio_stream::once(ai)))

View File

@ -189,8 +189,7 @@ async fn adapt_caching(
match cached {
Some(cached) => Ok(Box::pin(ZstdDecoder::new(Cursor::new(cached)))),
None => {
debug!("cache MISS, running adapter");
debug!("adapting with caching...");
debug!("cache MISS, running adapter with caching...");
let inp = loop_adapt(adapter.as_ref(), detection_reason, ai)?;
let inp = concat_read_streams(inp);
let inp = async_read_and_write_to_cache(
@ -228,16 +227,27 @@ pub fn loop_adapt(
adapter.metadata().name
)
})?;
debug!("got fph starting loop: {}", fph.to_string_lossy());
let s = stream! {
for await file in inp {
match buf_choose_adapter(file).await.expect("todo: handle") {
Ret::Recurse(ai, adapter, detection_reason, active_adapters) => {
Ret::Recurse(ai, adapter, detection_reason, _active_adapters) => {
debug!(
"Chose adapter '{}' because of matcher {:?}",
&adapter.metadata().name, &detection_reason
);
eprintln!(
"{} adapter: {}",
ai.filepath_hint.to_string_lossy(),
&adapter.metadata().name
);
for await ifile in loop_adapt(adapter.as_ref(), detection_reason, ai).expect("todo: handle") {
yield ifile;
}
}
Ret::Passthrough(ai) => {
debug!("no adapter for {}, ending recursion", ai.filepath_hint.to_string_lossy());
yield ai;
}
}