fix spawning adapters

This commit is contained in:
phiresky 2021-08-26 16:00:27 +02:00
parent 020000cc77
commit a7bbd93845
6 changed files with 26 additions and 56 deletions

11
Cargo.lock generated
View File

@ -253,6 +253,16 @@ dependencies = [
"lazy_static",
]
[[package]]
name = "ctor"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e98e2ad1a782e33928b96fc3948e7c355e5af34ba4de7670fe8bac2a3b2006d"
dependencies = [
"quote",
"syn",
]
[[package]]
name = "derive_more"
version = "0.99.16"
@ -968,6 +978,7 @@ dependencies = [
"clap",
"crossbeam",
"crossbeam-channel",
"ctor",
"derive_more",
"directories-next",
"dyn-clonable",

View File

@ -53,3 +53,6 @@ dyn-clone = "1.0.2"
dyn-clonable = "0.9.0"
zip = "0.5.8"
owning_ref = "0.4.1"
[dev-dependencies]
ctor = "0.1.20"

View File

@ -11,8 +11,7 @@ pub mod spawning;
pub mod zip;
use crate::{adapted_iter::AdaptedFilesIterBox, config::RgaConfig, matching::*};
use anyhow::*;
// use custom::builtin_spawning_adapters;
//use custom::CustomAdapterConfig;
use custom::builtin_spawning_adapters;
use custom::CustomAdapterConfig;
use log::*;
@ -123,11 +122,11 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
// Rc::new(pdfpages::PdfPagesAdapter::new()),
// Rc::new(tesseract::TesseractAdapter::new()),
];
/*adapters.extend(
adapters.extend(
builtin_spawning_adapters
.iter()
.map(|e| -> Rc<dyn FileAdapter> { Rc::new(e.clone().to_adapter()) }),
);*/
.map(|e| -> Rc<dyn FileAdapter> { Rc::new(e.to_adapter()) }),
);
adapters.extend(internal_adapters);
adapters

View File

@ -231,7 +231,7 @@ mod test {
let filepath = test_data_dir().join("short.pdf");
let (a, d) = simple_adapt_info(&filepath, Box::new(File::open(&filepath)?));
let mut r = adapter.adapt(a, &d)?;
let r = adapter.adapt(a, &d)?;
let o = adapted_to_vec(r)?;
assert_eq!(
String::from_utf8(o)?,

View File

@ -2,61 +2,12 @@ use crate::adapted_iter::SingleAdaptedFileAsIter;
use super::*;
use anyhow::*;
use encoding_rs_io::DecodeReaderBytesBuilder;
use log::*;
use std::io::BufReader;
use std::process::Command;
use std::process::{Child, Stdio};
use std::{io::prelude::*, path::Path};
/**
* Copy a Read to a Write, while prefixing every line with a prefix.
*
* Try to detect binary files and ignore them. Does not ensure any encoding in the output.
*
* Binary detection is needed because the rg binary detection does not apply to preprocessed files
*/
/**/
pub fn postproc_line_prefix(
line_prefix: &str,
inp: &mut dyn Read,
oup: &mut dyn Write,
) -> Result<()> {
// TODO: parse these options from ripgrep's configuration
let encoding = None; // detect bom but usually assume utf8
let bom_sniffing = true;
let mut decode_builder = DecodeReaderBytesBuilder::new();
// https://github.com/BurntSushi/ripgrep/blob/a7d26c8f144a4957b75f71087a66692d0b25759a/grep-searcher/src/searcher/mod.rs#L706
let inp = decode_builder
.encoding(encoding)
.utf8_passthru(true)
.strip_bom(bom_sniffing)
.bom_override(true)
.bom_sniffing(bom_sniffing)
.build(inp);
// check for null byte in first 8kB
let mut reader = BufReader::with_capacity(1 << 12, inp);
let fourk = reader.fill_buf()?;
if fourk.contains(&0u8) {
writeln!(oup, "{}[rga: binary data]\n", line_prefix)?;
return Ok(());
}
// intentionally do not call reader.consume
for line in reader.split(b'\n') {
let line = line?;
if line.contains(&0u8) {
writeln!(oup, "{}[rga: binary data]\n", line_prefix)?;
return Ok(());
}
oup.write_all(line_prefix.as_bytes())?;
oup.write_all(&line)?;
oup.write_all(b"\n")?;
}
Ok(())
}
// TODO: don't separate the trait and the struct
pub trait SpawningFileAdapterTrait: GetMetadata {
fn get_exe(&self) -> &str;
@ -161,7 +112,7 @@ impl FileAdapter for SpawningFileAdapter {
debug!("executing {:?}", cmd);
let output = pipe_output(&line_prefix, cmd, &mut inp, self.inner.get_exe(), "")?;
Ok(Box::new(SingleAdaptedFileAsIter::new(AdaptInfo {
filepath_hint,
filepath_hint: PathBuf::from(format!("{}.txt", filepath_hint.to_string_lossy())), // TODO: customizable
inp: output,
line_prefix,
is_real_file: false,

View File

@ -69,3 +69,9 @@ pub fn print_dur(start: Instant) -> String {
pub fn print_bytes(bytes: impl Into<f64>) -> String {
return pretty_bytes::converter::convert(bytes.into());
}
#[cfg(test)]
#[ctor::ctor]
fn init() {
env_logger::init();
}