fix spawning adapters

This commit is contained in:
phiresky 2021-08-26 16:00:27 +02:00
parent 020000cc77
commit a7bbd93845
6 changed files with 26 additions and 56 deletions

11
Cargo.lock generated
View File

@ -253,6 +253,16 @@ dependencies = [
"lazy_static", "lazy_static",
] ]
[[package]]
name = "ctor"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e98e2ad1a782e33928b96fc3948e7c355e5af34ba4de7670fe8bac2a3b2006d"
dependencies = [
"quote",
"syn",
]
[[package]] [[package]]
name = "derive_more" name = "derive_more"
version = "0.99.16" version = "0.99.16"
@ -968,6 +978,7 @@ dependencies = [
"clap", "clap",
"crossbeam", "crossbeam",
"crossbeam-channel", "crossbeam-channel",
"ctor",
"derive_more", "derive_more",
"directories-next", "directories-next",
"dyn-clonable", "dyn-clonable",

View File

@ -53,3 +53,6 @@ dyn-clone = "1.0.2"
dyn-clonable = "0.9.0" dyn-clonable = "0.9.0"
zip = "0.5.8" zip = "0.5.8"
owning_ref = "0.4.1" owning_ref = "0.4.1"
[dev-dependencies]
ctor = "0.1.20"

View File

@ -11,8 +11,7 @@ pub mod spawning;
pub mod zip; pub mod zip;
use crate::{adapted_iter::AdaptedFilesIterBox, config::RgaConfig, matching::*}; use crate::{adapted_iter::AdaptedFilesIterBox, config::RgaConfig, matching::*};
use anyhow::*; use anyhow::*;
// use custom::builtin_spawning_adapters; use custom::builtin_spawning_adapters;
//use custom::CustomAdapterConfig;
use custom::CustomAdapterConfig; use custom::CustomAdapterConfig;
use log::*; use log::*;
@ -123,11 +122,11 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
// Rc::new(pdfpages::PdfPagesAdapter::new()), // Rc::new(pdfpages::PdfPagesAdapter::new()),
// Rc::new(tesseract::TesseractAdapter::new()), // Rc::new(tesseract::TesseractAdapter::new()),
]; ];
/*adapters.extend( adapters.extend(
builtin_spawning_adapters builtin_spawning_adapters
.iter() .iter()
.map(|e| -> Rc<dyn FileAdapter> { Rc::new(e.clone().to_adapter()) }), .map(|e| -> Rc<dyn FileAdapter> { Rc::new(e.to_adapter()) }),
);*/ );
adapters.extend(internal_adapters); adapters.extend(internal_adapters);
adapters adapters

View File

@ -231,7 +231,7 @@ mod test {
let filepath = test_data_dir().join("short.pdf"); let filepath = test_data_dir().join("short.pdf");
let (a, d) = simple_adapt_info(&filepath, Box::new(File::open(&filepath)?)); let (a, d) = simple_adapt_info(&filepath, Box::new(File::open(&filepath)?));
let mut r = adapter.adapt(a, &d)?; let r = adapter.adapt(a, &d)?;
let o = adapted_to_vec(r)?; let o = adapted_to_vec(r)?;
assert_eq!( assert_eq!(
String::from_utf8(o)?, String::from_utf8(o)?,

View File

@ -2,61 +2,12 @@ use crate::adapted_iter::SingleAdaptedFileAsIter;
use super::*; use super::*;
use anyhow::*; use anyhow::*;
use encoding_rs_io::DecodeReaderBytesBuilder;
use log::*; use log::*;
use std::io::BufReader;
use std::process::Command; use std::process::Command;
use std::process::{Child, Stdio}; use std::process::{Child, Stdio};
use std::{io::prelude::*, path::Path}; use std::{io::prelude::*, path::Path};
/**
* Copy a Read to a Write, while prefixing every line with a prefix.
*
* Try to detect binary files and ignore them. Does not ensure any encoding in the output.
*
* Binary detection is needed because the rg binary detection does not apply to preprocessed files
*/
/**/
pub fn postproc_line_prefix(
line_prefix: &str,
inp: &mut dyn Read,
oup: &mut dyn Write,
) -> Result<()> {
// TODO: parse these options from ripgrep's configuration
let encoding = None; // detect bom but usually assume utf8
let bom_sniffing = true;
let mut decode_builder = DecodeReaderBytesBuilder::new();
// https://github.com/BurntSushi/ripgrep/blob/a7d26c8f144a4957b75f71087a66692d0b25759a/grep-searcher/src/searcher/mod.rs#L706
let inp = decode_builder
.encoding(encoding)
.utf8_passthru(true)
.strip_bom(bom_sniffing)
.bom_override(true)
.bom_sniffing(bom_sniffing)
.build(inp);
// check for null byte in first 8kB
let mut reader = BufReader::with_capacity(1 << 12, inp);
let fourk = reader.fill_buf()?;
if fourk.contains(&0u8) {
writeln!(oup, "{}[rga: binary data]\n", line_prefix)?;
return Ok(());
}
// intentionally do not call reader.consume
for line in reader.split(b'\n') {
let line = line?;
if line.contains(&0u8) {
writeln!(oup, "{}[rga: binary data]\n", line_prefix)?;
return Ok(());
}
oup.write_all(line_prefix.as_bytes())?;
oup.write_all(&line)?;
oup.write_all(b"\n")?;
}
Ok(())
}
// TODO: don't separate the trait and the struct // TODO: don't separate the trait and the struct
pub trait SpawningFileAdapterTrait: GetMetadata { pub trait SpawningFileAdapterTrait: GetMetadata {
fn get_exe(&self) -> &str; fn get_exe(&self) -> &str;
@ -161,7 +112,7 @@ impl FileAdapter for SpawningFileAdapter {
debug!("executing {:?}", cmd); debug!("executing {:?}", cmd);
let output = pipe_output(&line_prefix, cmd, &mut inp, self.inner.get_exe(), "")?; let output = pipe_output(&line_prefix, cmd, &mut inp, self.inner.get_exe(), "")?;
Ok(Box::new(SingleAdaptedFileAsIter::new(AdaptInfo { Ok(Box::new(SingleAdaptedFileAsIter::new(AdaptInfo {
filepath_hint, filepath_hint: PathBuf::from(format!("{}.txt", filepath_hint.to_string_lossy())), // TODO: customizable
inp: output, inp: output,
line_prefix, line_prefix,
is_real_file: false, is_real_file: false,

View File

@ -69,3 +69,9 @@ pub fn print_dur(start: Instant) -> String {
pub fn print_bytes(bytes: impl Into<f64>) -> String { pub fn print_bytes(bytes: impl Into<f64>) -> String {
return pretty_bytes::converter::convert(bytes.into()); return pretty_bytes::converter::convert(bytes.into());
} }
#[cfg(test)]
#[ctor::ctor]
fn init() {
env_logger::init();
}