ripgrep-all/src/preproc.rs

168 lines
6.2 KiB
Rust
Raw Normal View History

2019-06-06 09:00:13 +00:00
use crate::adapters::*;
2019-06-07 21:04:18 +00:00
use crate::args::RgaArgs;
2019-06-12 10:25:02 +00:00
use crate::matching::*;
2019-06-06 09:00:13 +00:00
use crate::CachingWriter;
2019-06-07 17:00:24 +00:00
use failure::Fallible;
2019-06-06 09:00:13 +00:00
use failure::{format_err, Error};
2019-06-18 10:14:09 +00:00
use log::*;
2019-06-06 09:00:13 +00:00
use path_clean::PathClean;
2019-06-07 21:04:18 +00:00
use std::convert::TryInto;
use std::io::BufRead;
use std::io::BufReader;
2019-06-06 23:17:55 +00:00
use std::io::BufWriter;
2019-06-07 18:12:24 +00:00
use std::sync::{Arc, RwLock};
2019-06-06 09:00:13 +00:00
2019-06-07 18:12:24 +00:00
#[derive(Clone)]
2019-06-07 21:04:18 +00:00
pub struct PreprocConfig<'a> {
2019-06-07 18:12:24 +00:00
pub cache: Option<Arc<RwLock<dyn crate::preproc_cache::PreprocCache>>>,
2019-06-07 21:04:18 +00:00
pub args: &'a RgaArgs,
2019-06-06 09:00:13 +00:00
}
2019-06-06 21:50:58 +00:00
/**
* preprocess a file as defined in `ai`.
*
* If a cache is passed, read/write to it.
*
*/
2019-06-18 10:14:09 +00:00
pub fn rga_preproc(ai: AdaptInfo) -> Fallible<()> {
2019-06-06 09:00:13 +00:00
let AdaptInfo {
filepath_hint,
2019-06-06 21:43:30 +00:00
is_real_file,
2019-06-06 09:00:13 +00:00
inp,
oup,
line_prefix,
2019-06-07 17:00:24 +00:00
config,
2019-06-07 18:12:24 +00:00
archive_recursion_depth,
2019-06-06 09:00:13 +00:00
..
} = ai;
2019-06-07 21:04:18 +00:00
let PreprocConfig { mut cache, args } = config;
2019-06-11 11:34:04 +00:00
let adapters = adapter_matcher(&args.adapters[..], args.accurate)?;
2019-06-06 09:00:13 +00:00
let filename = filepath_hint
.file_name()
2019-06-06 21:43:30 +00:00
.ok_or_else(|| format_err!("Empty filename"))?;
2019-06-18 10:14:09 +00:00
debug!("depth: {}", archive_recursion_depth);
2019-06-11 11:34:04 +00:00
if archive_recursion_depth >= args.max_archive_recursion {
2019-06-07 18:12:24 +00:00
writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?;
return Ok(());
}
2019-06-06 09:00:13 +00:00
2019-06-18 10:14:09 +00:00
debug!("path_hint: {:?}", filepath_hint);
2019-06-06 09:00:13 +00:00
// todo: figure out when using a bufreader is a good idea and when it is not
2019-06-18 10:14:09 +00:00
// seems to be good for File::open() reads, but not sure about within archives (tar, zip)
let inp = &mut BufReader::with_capacity(1 << 13, inp);
let mimetype = if args.accurate {
let buf = inp.fill_buf()?; // fill but do not consume!
let mimetype = tree_magic::from_u8(buf);
2019-06-18 10:14:09 +00:00
debug!("mimetype: {:?}", mimetype);
Some(mimetype)
} else {
None
};
2019-06-06 09:00:13 +00:00
let adapter = adapters(FileMeta {
mimetype,
2019-06-06 09:00:13 +00:00
lossy_filename: filename.to_string_lossy().to_string(),
});
match adapter {
2019-06-16 09:37:27 +00:00
Some((adapter, detection_reason)) => {
let meta = adapter.metadata();
2019-06-06 09:00:13 +00:00
eprintln!("adapter: {}", &meta.name);
let db_name = format!("{}.v{}", meta.name, meta.version);
2019-06-07 18:12:24 +00:00
if let Some(cache) = cache.as_mut() {
2019-06-06 09:00:13 +00:00
let cache_key: Vec<u8> = {
let clean_path = filepath_hint.to_owned().clean();
let meta = std::fs::metadata(&filepath_hint)?;
2019-06-16 10:19:01 +00:00
if adapter.metadata().recurses {
let key = (
clean_path,
meta.modified().expect("weird OS that can't into mtime"),
&args.adapters[..],
);
2019-06-18 10:14:09 +00:00
debug!("cache key: {:?}", key);
2019-06-16 10:19:01 +00:00
bincode::serialize(&key).expect("could not serialize path") // key in the cache database
} else {
let key = (
clean_path,
meta.modified().expect("weird OS that can't into mtime"),
);
2019-06-18 10:14:09 +00:00
debug!("cache key: {:?}", key);
2019-06-16 10:19:01 +00:00
bincode::serialize(&key).expect("could not serialize path") // key in the cache database
}
2019-06-06 09:00:13 +00:00
};
2019-06-07 18:12:24 +00:00
cache.write().unwrap().get_or_run(
2019-06-07 17:00:24 +00:00
&db_name,
&cache_key,
Box::new(|| -> Fallible<Option<Vec<u8>>> {
2019-06-06 23:17:55 +00:00
// wrapping BufWriter here gives ~10% perf boost
2019-06-07 21:04:18 +00:00
let mut compbuf = BufWriter::new(CachingWriter::new(
oup,
2019-06-11 11:34:04 +00:00
args.cache_max_blob_len.try_into().unwrap(),
args.cache_compression_level.try_into().unwrap(),
2019-06-07 21:04:18 +00:00
)?);
2019-06-18 10:14:09 +00:00
debug!("adapting...");
2019-06-16 09:37:27 +00:00
adapter.adapt(
AdaptInfo {
line_prefix,
filepath_hint,
is_real_file,
inp,
oup: &mut compbuf,
archive_recursion_depth,
config: PreprocConfig { cache: None, args },
},
2019-06-16 10:05:27 +00:00
&detection_reason,
2019-06-16 09:37:27 +00:00
)?;
2019-06-06 23:17:55 +00:00
let compressed = compbuf
.into_inner()
.map_err(|_| "could not finish zstd")
.unwrap()
.finish()?;
2019-06-06 09:00:13 +00:00
if let Some(cached) = compressed {
2019-06-18 10:14:09 +00:00
debug!("compressed len: {}", cached.len());
2019-06-07 21:17:33 +00:00
Ok(Some(cached))
} else {
Ok(None)
}
2019-06-07 17:00:24 +00:00
}),
Box::new(|cached| {
let stdouti = std::io::stdout();
zstd::stream::copy_decode(cached, stdouti.lock())?;
2019-06-06 09:00:13 +00:00
Ok(())
2019-06-07 17:00:24 +00:00
}),
)?;
Ok(())
2019-06-06 09:00:13 +00:00
} else {
2019-06-18 10:14:09 +00:00
debug!("adapting...");
2019-06-16 09:37:27 +00:00
adapter.adapt(
AdaptInfo {
line_prefix,
filepath_hint,
is_real_file,
inp,
oup,
archive_recursion_depth,
config: PreprocConfig { cache: None, args },
},
2019-06-16 10:05:27 +00:00
&detection_reason,
2019-06-16 09:37:27 +00:00
)?;
2019-06-06 09:00:13 +00:00
Ok(())
}
}
None => {
2019-06-16 09:07:29 +00:00
// allow passthrough if the file is in an archive or accurate matching is enabled
2019-06-06 22:57:53 +00:00
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
2019-06-16 09:07:29 +00:00
let allow_cat = !is_real_file || args.accurate;
2019-06-06 09:00:13 +00:00
if allow_cat {
2019-06-06 22:18:04 +00:00
spawning::postproc_line_prefix(line_prefix, inp, oup)?;
2019-06-06 09:00:13 +00:00
Ok(())
} else {
2019-06-16 09:07:29 +00:00
Err(format_err!(
"No adapter found for file {:?}, passthrough disabled.",
filename
))
2019-06-06 09:00:13 +00:00
}
}
}
}