fix recursion and pdf adapter

This commit is contained in:
phiresky 2022-12-25 18:27:50 +01:00
parent 616c0a560b
commit ddeceb0ce9
4 changed files with 31 additions and 7 deletions

View File

@ -25,6 +25,8 @@ use std::path::PathBuf;
use std::pin::Pin; use std::pin::Pin;
use std::rc::Rc; use std::rc::Rc;
use self::postproc::PostprocPageBreaks;
pub type ReadBox = Pin<Box<dyn AsyncRead + Send>>; pub type ReadBox = Pin<Box<dyn AsyncRead + Send>>;
pub struct AdapterMeta { pub struct AdapterMeta {
/// unique short name of this adapter (a-z0-9 only) /// unique short name of this adapter (a-z0-9 only)
@ -117,6 +119,7 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
} }
let internal_adapters: Vec<Arc<dyn FileAdapter>> = vec![ let internal_adapters: Vec<Arc<dyn FileAdapter>> = vec![
Arc::new(PostprocPageBreaks::new()),
//Rc::new(ffmpeg::FFmpegAdapter::new()), //Rc::new(ffmpeg::FFmpegAdapter::new()),
// Rc::new(zip::ZipAdapter::new()), // Rc::new(zip::ZipAdapter::new()),
//Rc::new(decompress::DecompressAdapter::new()), //Rc::new(decompress::DecompressAdapter::new()),

View File

@ -213,9 +213,6 @@ fn arg_replacer(arg: &str, filepath_hint: &Path) -> Result<String> {
})) }))
} }
impl CustomSpawningFileAdapter { impl CustomSpawningFileAdapter {
fn get_exe(&self) -> &str {
&self.binary
}
fn command( fn command(
&self, &self,
filepath_hint: &std::path::Path, filepath_hint: &std::path::Path,
@ -266,7 +263,7 @@ impl FileAdapter for CustomSpawningFileAdapter {
inp: output, inp: output,
line_prefix, line_prefix,
is_real_file: false, is_real_file: false,
archive_recursion_depth, archive_recursion_depth: archive_recursion_depth + 1,
postprocess, postprocess,
config, config,
}))) })))

View File

@ -8,7 +8,9 @@ use async_stream::stream;
use bytes::Bytes; use bytes::Bytes;
use encoding_rs_io::DecodeReaderBytesBuilder; use encoding_rs_io::DecodeReaderBytesBuilder;
use std::cmp::min; use std::cmp::min;
use std::ffi::OsStr;
use std::io::Cursor; use std::io::Cursor;
use std::path::PathBuf;
use std::pin::Pin; use std::pin::Pin;
use tokio::io::{AsyncRead, AsyncReadExt}; use tokio::io::{AsyncRead, AsyncReadExt};
use tokio_util::io::ReaderStream; use tokio_util::io::ReaderStream;
@ -145,6 +147,11 @@ pub fn postproc_prefix(line_prefix: &str, inp: impl AsyncRead + Send) -> impl As
} }
pub struct PostprocPageBreaks {} pub struct PostprocPageBreaks {}
impl PostprocPageBreaks {
pub fn new() -> Self {
Self {}
}
}
impl GetMetadata for PostprocPageBreaks { impl GetMetadata for PostprocPageBreaks {
fn metadata(&self) -> &super::AdapterMeta { fn metadata(&self) -> &super::AdapterMeta {
lazy_static::lazy_static! { lazy_static::lazy_static! {
@ -176,6 +183,13 @@ impl FileAdapter for PostprocPageBreaks {
let ai = AdaptInfo { let ai = AdaptInfo {
inp: Box::pin(read), inp: Box::pin(read),
postprocess: false, postprocess: false,
archive_recursion_depth: a.archive_recursion_depth + 1,
filepath_hint: a
.filepath_hint
.parent()
.map(PathBuf::from)
.unwrap_or(PathBuf::new())
.join(a.filepath_hint.file_stem().unwrap_or(OsStr::new(""))),
..a ..a
}; };
Ok(Box::pin(tokio_stream::once(ai))) Ok(Box::pin(tokio_stream::once(ai)))

View File

@ -189,8 +189,7 @@ async fn adapt_caching(
match cached { match cached {
Some(cached) => Ok(Box::pin(ZstdDecoder::new(Cursor::new(cached)))), Some(cached) => Ok(Box::pin(ZstdDecoder::new(Cursor::new(cached)))),
None => { None => {
debug!("cache MISS, running adapter"); debug!("cache MISS, running adapter with caching...");
debug!("adapting with caching...");
let inp = loop_adapt(adapter.as_ref(), detection_reason, ai)?; let inp = loop_adapt(adapter.as_ref(), detection_reason, ai)?;
let inp = concat_read_streams(inp); let inp = concat_read_streams(inp);
let inp = async_read_and_write_to_cache( let inp = async_read_and_write_to_cache(
@ -228,16 +227,27 @@ pub fn loop_adapt(
adapter.metadata().name adapter.metadata().name
) )
})?; })?;
debug!("got fph starting loop: {}", fph.to_string_lossy());
let s = stream! { let s = stream! {
for await file in inp { for await file in inp {
match buf_choose_adapter(file).await.expect("todo: handle") { match buf_choose_adapter(file).await.expect("todo: handle") {
Ret::Recurse(ai, adapter, detection_reason, active_adapters) => { Ret::Recurse(ai, adapter, detection_reason, _active_adapters) => {
debug!(
"Chose adapter '{}' because of matcher {:?}",
&adapter.metadata().name, &detection_reason
);
eprintln!(
"{} adapter: {}",
ai.filepath_hint.to_string_lossy(),
&adapter.metadata().name
);
for await ifile in loop_adapt(adapter.as_ref(), detection_reason, ai).expect("todo: handle") { for await ifile in loop_adapt(adapter.as_ref(), detection_reason, ai).expect("todo: handle") {
yield ifile; yield ifile;
} }
} }
Ret::Passthrough(ai) => { Ret::Passthrough(ai) => {
debug!("no adapter for {}, ending recursion", ai.filepath_hint.to_string_lossy());
yield ai; yield ai;
} }
} }