diff --git a/exampledir/screenshot.png b/exampledir/screenshot.png new file mode 100644 index 0000000..4d683e5 Binary files /dev/null and b/exampledir/screenshot.png differ diff --git a/src/adapters/pdfpages.rs b/src/adapters/pdfpages.rs index 7931317..0705935 100644 --- a/src/adapters/pdfpages.rs +++ b/src/adapters/pdfpages.rs @@ -41,6 +41,71 @@ impl GetMetadata for PdfPagesAdapter { } } +impl FileAdapter for PdfPagesAdapter { + fn adapt(&self, ai: AdaptInfo) -> Fallible<()> { + let AdaptInfo { + filepath_hint, + is_real_file, + mut inp, + oup, + line_prefix, + archive_recursion_depth, + config, + .. + } = ai; + if !is_real_file { + // todo: read to memory and then use that blob if size < max + writeln!(oup, "{}[rga: skipping pdfpages in archive]", line_prefix,)?; + return Ok(()); + } + let inp_fname = filepath_hint; + let exe_name = "gm"; + let out_dir = tempfile::Builder::new().prefix("pdfpages-").tempdir()?; + let out_fname = out_dir.path().join("out%04d.png"); + eprintln!("writing to temp dir: {}", out_fname.display()); + let mut cmd = Command::new(exe_name); + cmd.arg("convert") + .arg("-density") + .arg("200") + .arg(inp_fname) + .arg("+adjoin") + .arg(out_fname); + + let mut cmd = cmd.spawn().map_err(|e| { + map_exe_error(e, exe_name, "Make sure you have graphicsmagick installed.") + })?; + let args = config.args; + // TODO: how to handle this copying better? + + let status = cmd.wait()?; + if status.success() { + } else { + return Err(format_err!("subprocess failed: {:?}", status)); + } + for (i, filename) in glob::glob( + out_dir + .path() + .join("out*.png") + .to_str() + .expect("temp path has invalid encoding"), + )? + .enumerate() + { + let mut ele = BufReader::new(File::open(filename?)?); + rga_preproc(AdaptInfo { + filepath_hint: &PathBuf::from(format!("Page {}.png", i + 1)), + is_real_file: false, + inp: &mut ele, + oup, + line_prefix: &format!("{}Page {}:", line_prefix, i + 1), + archive_recursion_depth: archive_recursion_depth + 1, + config: PreprocConfig { cache: None, args }, + })?; + } + Ok(()) + } +} + /*// todo: do this in an actually streaming fashion and less slow // IEND chunk + PDF magic // 4945 4e44 ae42 6082 8950 4e47 0d0a 1a0a @@ -72,72 +137,3 @@ fn split_by_seq<'a>( out.push(Cursor::new(Vec::from(&all[last..]))); Ok(out) }*/ - -impl FileAdapter for PdfPagesAdapter { - fn adapt(&self, ai: AdaptInfo) -> Fallible<()> { - let AdaptInfo { - filepath_hint, - is_real_file, - mut inp, - oup, - line_prefix, - archive_recursion_depth, - config, - .. - } = ai; - if !is_real_file { - // todo: read to memory and then use that blob if size < max - writeln!(oup, "{}[rga: skipping pdfpages in archive]", line_prefix,)?; - return Ok(()); - } - let inp_fname = filepath_hint; - let exe_name = "gm"; - let out_dir = tempfile::Builder::new().prefix("pdfpages-").tempdir()?; - let out_fname = out_dir.path().join("out%04d.png"); - eprintln!("writing to temp dir: {}", out_fname.display()); - let mut cmd = Command::new(exe_name); - cmd.arg("convert") - .arg("-density") - .arg("300") - .arg(inp_fname) - .arg("+adjoin") - .arg(out_fname); - - let mut cmd = cmd.spawn().map_err(|e| { - map_exe_error( - e, - exe_name, - "Could not find gm. Make sure you have graphicsmagick installed.", - ) - })?; - let args = config.args; - // TODO: how to handle this copying better? - - let status = cmd.wait()?; - if status.success() { - } else { - return Err(format_err!("subprocess failed: {:?}", status)); - } - for (i, filename) in glob::glob( - out_dir - .path() - .join("out*.png") - .to_str() - .expect("temp path has invalid encoding"), - )? - .enumerate() - { - let mut ele = BufReader::new(File::open(filename?)?); - rga_preproc(AdaptInfo { - filepath_hint: &PathBuf::from(format!("Page {}.png", i + 1)), - is_real_file: false, - inp: &mut ele, - oup, - line_prefix, - archive_recursion_depth: archive_recursion_depth + 1, - config: PreprocConfig { cache: None, args }, - })?; - } - Ok(()) - } -} diff --git a/src/adapters/tesseract.rs b/src/adapters/tesseract.rs index fc7bea9..42ebe10 100644 --- a/src/adapters/tesseract.rs +++ b/src/adapters/tesseract.rs @@ -36,7 +36,8 @@ impl SpawningFileAdapter for TesseractAdapter { "tesseract" } fn command(&self, _filepath_hint: &Path, mut cmd: Command) -> Command { - cmd.arg("-").arg("-"); + // rg already does threading + cmd.env("OMP_THREAD_LIMIT", "1").arg("-").arg("-"); cmd } }