mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-24 12:24:56 +00:00
tesseract single threaded
This commit is contained in:
parent
1e9c2e45d6
commit
1f6e793a7f
BIN
exampledir/screenshot.png
Normal file
BIN
exampledir/screenshot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 52 KiB |
@ -41,6 +41,71 @@ impl GetMetadata for PdfPagesAdapter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl FileAdapter for PdfPagesAdapter {
|
||||||
|
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
|
||||||
|
let AdaptInfo {
|
||||||
|
filepath_hint,
|
||||||
|
is_real_file,
|
||||||
|
mut inp,
|
||||||
|
oup,
|
||||||
|
line_prefix,
|
||||||
|
archive_recursion_depth,
|
||||||
|
config,
|
||||||
|
..
|
||||||
|
} = ai;
|
||||||
|
if !is_real_file {
|
||||||
|
// todo: read to memory and then use that blob if size < max
|
||||||
|
writeln!(oup, "{}[rga: skipping pdfpages in archive]", line_prefix,)?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let inp_fname = filepath_hint;
|
||||||
|
let exe_name = "gm";
|
||||||
|
let out_dir = tempfile::Builder::new().prefix("pdfpages-").tempdir()?;
|
||||||
|
let out_fname = out_dir.path().join("out%04d.png");
|
||||||
|
eprintln!("writing to temp dir: {}", out_fname.display());
|
||||||
|
let mut cmd = Command::new(exe_name);
|
||||||
|
cmd.arg("convert")
|
||||||
|
.arg("-density")
|
||||||
|
.arg("200")
|
||||||
|
.arg(inp_fname)
|
||||||
|
.arg("+adjoin")
|
||||||
|
.arg(out_fname);
|
||||||
|
|
||||||
|
let mut cmd = cmd.spawn().map_err(|e| {
|
||||||
|
map_exe_error(e, exe_name, "Make sure you have graphicsmagick installed.")
|
||||||
|
})?;
|
||||||
|
let args = config.args;
|
||||||
|
// TODO: how to handle this copying better?
|
||||||
|
|
||||||
|
let status = cmd.wait()?;
|
||||||
|
if status.success() {
|
||||||
|
} else {
|
||||||
|
return Err(format_err!("subprocess failed: {:?}", status));
|
||||||
|
}
|
||||||
|
for (i, filename) in glob::glob(
|
||||||
|
out_dir
|
||||||
|
.path()
|
||||||
|
.join("out*.png")
|
||||||
|
.to_str()
|
||||||
|
.expect("temp path has invalid encoding"),
|
||||||
|
)?
|
||||||
|
.enumerate()
|
||||||
|
{
|
||||||
|
let mut ele = BufReader::new(File::open(filename?)?);
|
||||||
|
rga_preproc(AdaptInfo {
|
||||||
|
filepath_hint: &PathBuf::from(format!("Page {}.png", i + 1)),
|
||||||
|
is_real_file: false,
|
||||||
|
inp: &mut ele,
|
||||||
|
oup,
|
||||||
|
line_prefix: &format!("{}Page {}:", line_prefix, i + 1),
|
||||||
|
archive_recursion_depth: archive_recursion_depth + 1,
|
||||||
|
config: PreprocConfig { cache: None, args },
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*// todo: do this in an actually streaming fashion and less slow
|
/*// todo: do this in an actually streaming fashion and less slow
|
||||||
// IEND chunk + PDF magic
|
// IEND chunk + PDF magic
|
||||||
// 4945 4e44 ae42 6082 8950 4e47 0d0a 1a0a
|
// 4945 4e44 ae42 6082 8950 4e47 0d0a 1a0a
|
||||||
@ -72,72 +137,3 @@ fn split_by_seq<'a>(
|
|||||||
out.push(Cursor::new(Vec::from(&all[last..])));
|
out.push(Cursor::new(Vec::from(&all[last..])));
|
||||||
Ok(out)
|
Ok(out)
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
impl FileAdapter for PdfPagesAdapter {
|
|
||||||
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
|
|
||||||
let AdaptInfo {
|
|
||||||
filepath_hint,
|
|
||||||
is_real_file,
|
|
||||||
mut inp,
|
|
||||||
oup,
|
|
||||||
line_prefix,
|
|
||||||
archive_recursion_depth,
|
|
||||||
config,
|
|
||||||
..
|
|
||||||
} = ai;
|
|
||||||
if !is_real_file {
|
|
||||||
// todo: read to memory and then use that blob if size < max
|
|
||||||
writeln!(oup, "{}[rga: skipping pdfpages in archive]", line_prefix,)?;
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let inp_fname = filepath_hint;
|
|
||||||
let exe_name = "gm";
|
|
||||||
let out_dir = tempfile::Builder::new().prefix("pdfpages-").tempdir()?;
|
|
||||||
let out_fname = out_dir.path().join("out%04d.png");
|
|
||||||
eprintln!("writing to temp dir: {}", out_fname.display());
|
|
||||||
let mut cmd = Command::new(exe_name);
|
|
||||||
cmd.arg("convert")
|
|
||||||
.arg("-density")
|
|
||||||
.arg("300")
|
|
||||||
.arg(inp_fname)
|
|
||||||
.arg("+adjoin")
|
|
||||||
.arg(out_fname);
|
|
||||||
|
|
||||||
let mut cmd = cmd.spawn().map_err(|e| {
|
|
||||||
map_exe_error(
|
|
||||||
e,
|
|
||||||
exe_name,
|
|
||||||
"Could not find gm. Make sure you have graphicsmagick installed.",
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let args = config.args;
|
|
||||||
// TODO: how to handle this copying better?
|
|
||||||
|
|
||||||
let status = cmd.wait()?;
|
|
||||||
if status.success() {
|
|
||||||
} else {
|
|
||||||
return Err(format_err!("subprocess failed: {:?}", status));
|
|
||||||
}
|
|
||||||
for (i, filename) in glob::glob(
|
|
||||||
out_dir
|
|
||||||
.path()
|
|
||||||
.join("out*.png")
|
|
||||||
.to_str()
|
|
||||||
.expect("temp path has invalid encoding"),
|
|
||||||
)?
|
|
||||||
.enumerate()
|
|
||||||
{
|
|
||||||
let mut ele = BufReader::new(File::open(filename?)?);
|
|
||||||
rga_preproc(AdaptInfo {
|
|
||||||
filepath_hint: &PathBuf::from(format!("Page {}.png", i + 1)),
|
|
||||||
is_real_file: false,
|
|
||||||
inp: &mut ele,
|
|
||||||
oup,
|
|
||||||
line_prefix,
|
|
||||||
archive_recursion_depth: archive_recursion_depth + 1,
|
|
||||||
config: PreprocConfig { cache: None, args },
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -36,7 +36,8 @@ impl SpawningFileAdapter for TesseractAdapter {
|
|||||||
"tesseract"
|
"tesseract"
|
||||||
}
|
}
|
||||||
fn command(&self, _filepath_hint: &Path, mut cmd: Command) -> Command {
|
fn command(&self, _filepath_hint: &Path, mut cmd: Command) -> Command {
|
||||||
cmd.arg("-").arg("-");
|
// rg already does threading
|
||||||
|
cmd.env("OMP_THREAD_LIMIT", "1").arg("-").arg("-");
|
||||||
cmd
|
cmd
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user