ripgrep-all/src/adapters.rs

183 lines
6.3 KiB
Rust
Raw Normal View History

2020-06-08 23:45:52 +00:00
pub mod custom;
2019-06-16 09:07:29 +00:00
pub mod decompress;
2020-06-11 22:09:37 +00:00
pub mod ffmpeg;
2020-06-11 12:38:50 +00:00
pub mod fns;
2020-06-11 21:09:31 +00:00
//pub mod pdfpages;
2019-06-04 18:08:26 +00:00
pub mod poppler;
pub mod spawning;
2019-06-07 14:57:11 +00:00
pub mod sqlite;
2020-06-11 21:09:31 +00:00
//pub mod tar;
//pub mod tesseract;
pub mod writing;
// pub mod zip;
2019-06-12 10:25:02 +00:00
use crate::matching::*;
2019-06-07 17:00:24 +00:00
use crate::preproc::PreprocConfig;
use anyhow::*;
use custom::builtin_spawning_adapters;
2020-06-08 23:45:52 +00:00
use custom::CustomAdapterConfig;
2019-06-07 22:04:48 +00:00
use log::*;
2020-06-11 21:09:31 +00:00
2019-06-11 11:34:04 +00:00
use std::borrow::Cow;
2019-06-07 22:04:48 +00:00
use std::collections::HashMap;
2019-06-06 09:00:13 +00:00
use std::io::prelude::*;
2019-06-11 11:34:04 +00:00
use std::iter::Iterator;
2020-06-11 21:09:31 +00:00
use std::path::{Path, PathBuf};
2019-06-04 18:08:26 +00:00
use std::rc::Rc;
2020-06-11 21:09:31 +00:00
pub type ReadBox = Box<dyn Read + Send>;
2019-06-04 18:08:26 +00:00
pub struct AdapterMeta {
2019-06-07 22:04:48 +00:00
/// unique short name of this adapter (a-z0-9 only)
2019-06-04 18:08:26 +00:00
pub name: String,
2019-06-07 22:04:48 +00:00
/// version identifier. used to key cache entries, change if your output format changes
2019-06-04 18:08:26 +00:00
pub version: i32,
2019-06-07 22:04:48 +00:00
pub description: String,
2019-06-16 10:19:01 +00:00
/// indicates whether this adapter can descend (=call rga_preproc again). if true, the cache key needs to include the list of active adapters
pub recurses: bool,
/// list of matchers (interpreted as a OR b OR ...)
2019-06-11 11:34:04 +00:00
pub fast_matchers: Vec<FastMatcher>,
/// list of matchers when we have mime type detection active (interpreted as ORed)
/// warning: this *overrides* the fast matchers
pub slow_matchers: Option<Vec<SlowMatcher>>,
// if true, adapter is only used when user lists it in `--rga-adapters`
pub disabled_by_default: bool,
2019-06-11 11:34:04 +00:00
}
impl AdapterMeta {
// todo: this is pretty ugly
2019-06-12 10:25:02 +00:00
pub fn get_matchers<'a>(
&'a self,
slow: bool,
) -> Box<dyn Iterator<Item = Cow<SlowMatcher>> + 'a> {
2019-06-11 11:34:04 +00:00
match (slow, &self.slow_matchers) {
(true, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))),
(_, _) => Box::new(
self.fast_matchers
.iter()
.map(|e| Cow::Owned(SlowMatcher::Fast(e.clone()))),
),
}
}
2019-06-04 18:08:26 +00:00
}
pub trait GetMetadata {
2019-06-06 21:43:30 +00:00
fn metadata(&self) -> &AdapterMeta;
2019-06-04 18:08:26 +00:00
}
pub trait FileAdapter: GetMetadata {
2019-06-16 09:37:27 +00:00
/// adapt a file.
///
/// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
2020-06-11 21:09:31 +00:00
fn adapt(&self, a: AdaptInfo, detection_reason: &SlowMatcher) -> Result<ReadBox>;
2019-06-06 09:00:13 +00:00
}
2020-06-11 21:09:31 +00:00
pub struct AdaptInfo {
2019-06-06 21:43:30 +00:00
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
2020-06-11 21:09:31 +00:00
pub filepath_hint: PathBuf,
2019-06-06 21:43:30 +00:00
/// true if filepath_hint is an actual file on the file system
pub is_real_file: bool,
2019-06-07 13:43:19 +00:00
/// depth at which this file is in archives. 0 for real filesystem
pub archive_recursion_depth: i32,
2019-06-06 21:50:58 +00:00
/// stream to read the file from. can be from a file or from some decoder
2020-06-11 21:09:31 +00:00
pub inp: ReadBox,
2019-06-06 21:50:58 +00:00
/// prefix every output line with this string to better indicate the file's location if it is in some archive
2020-06-11 21:09:31 +00:00
pub line_prefix: String,
pub config: PreprocConfig,
2019-06-04 18:08:26 +00:00
}
2019-06-16 09:37:27 +00:00
/// (enabledAdapters, disabledAdapters)
type AdaptersTuple = (Vec<Rc<dyn FileAdapter>>, Vec<Rc<dyn FileAdapter>>);
2020-06-08 23:45:52 +00:00
pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> AdaptersTuple {
// order in descending priority
let mut adapters: Vec<Rc<dyn FileAdapter>> = vec![];
2020-06-08 23:45:52 +00:00
if let Some(custom_adapters) = custom_adapters {
for adapter_config in custom_adapters {
adapters.push(Rc::new(adapter_config.to_adapter()));
2020-06-08 23:45:52 +00:00
}
}
let internal_adapters: Vec<Rc<dyn FileAdapter>> = vec![
2020-06-11 22:09:37 +00:00
Rc::new(ffmpeg::FFmpegAdapter::new()),
2020-06-11 21:09:31 +00:00
//Rc::new(zip::ZipAdapter::new()),
2019-06-16 09:07:29 +00:00
Rc::new(decompress::DecompressAdapter::new()),
2020-06-11 21:09:31 +00:00
// Rc::new(tar::TarAdapter::new()),
2019-06-12 15:23:30 +00:00
Rc::new(sqlite::SqliteAdapter::new()),
2020-06-11 21:09:31 +00:00
// Rc::new(pdfpages::PdfPagesAdapter::new()),
//Rc::new(tesseract::TesseractAdapter::new()),
];
adapters.extend(
builtin_spawning_adapters
.iter()
.map(|e| -> Rc<dyn FileAdapter> { Rc::new(e.clone().to_adapter()) }),
);
adapters.extend(internal_adapters);
adapters
.into_iter()
.partition(|e| !e.metadata().disabled_by_default)
2019-06-05 19:28:35 +00:00
}
2019-06-04 18:08:26 +00:00
/**
* filter adapters by given names:
*
* - "" means use default enabled adapter list
2019-06-12 15:23:30 +00:00
* - "a,b" means use adapters a,b
* - "-a,b" means use default list except for a and b
2019-06-16 09:37:27 +00:00
* - "+a,b" means use default list but also a and b (a,b will be prepended to the list so given higher priority)
*/
2019-06-11 11:34:04 +00:00
pub fn get_adapters_filtered<T: AsRef<str>>(
2020-06-08 23:45:52 +00:00
custom_adapters: Option<Vec<CustomAdapterConfig>>,
adapter_names: &Vec<T>,
) -> Result<Vec<Rc<dyn FileAdapter>>> {
2020-06-08 23:45:52 +00:00
let (def_enabled_adapters, def_disabled_adapters) = get_all_adapters(custom_adapters);
2019-06-07 22:04:48 +00:00
let adapters = if !adapter_names.is_empty() {
let adapters_map: HashMap<_, _> = def_enabled_adapters
2019-06-07 22:04:48 +00:00
.iter()
.chain(def_disabled_adapters.iter())
2019-06-07 22:04:48 +00:00
.map(|e| (e.metadata().name.clone(), e.clone()))
.collect();
let mut adapters = vec![];
let mut subtractive = false;
2019-06-12 15:23:30 +00:00
let mut additive = false;
2019-06-07 22:04:48 +00:00
for (i, name) in adapter_names.iter().enumerate() {
2019-06-11 11:34:04 +00:00
let mut name = name.as_ref();
if i == 0 && (name.starts_with('-')) {
2019-06-07 22:04:48 +00:00
subtractive = true;
name = &name[1..];
adapters = def_enabled_adapters.clone();
} else if i == 0 && (name.starts_with('+')) {
name = &name[1..];
adapters = def_enabled_adapters.clone();
2019-06-12 15:23:30 +00:00
additive = true;
2019-06-07 22:04:48 +00:00
}
if subtractive {
let inx = adapters
.iter()
2019-06-11 11:34:04 +00:00
.position(|a| a.metadata().name == name)
2019-06-07 22:04:48 +00:00
.ok_or_else(|| format_err!("Could not remove {}: Not in list", name))?;
adapters.remove(inx);
} else {
2019-06-12 15:23:30 +00:00
let adapter = adapters_map
.get(name)
.ok_or_else(|| format_err!("Unknown adapter: \"{}\"", name))?
.clone();
if additive {
adapters.insert(0, adapter);
} else {
adapters.push(adapter);
}
2019-06-07 22:04:48 +00:00
}
}
adapters
} else {
def_enabled_adapters
2019-06-07 22:04:48 +00:00
};
debug!(
2020-06-09 10:47:34 +00:00
"Chosen available adapters: {}",
2019-06-07 22:04:48 +00:00
adapters
.iter()
.map(|a| a.metadata().name.clone())
.collect::<Vec<String>>()
.join(",")
);
Ok(adapters)
}