ripgrep-all/src/adapters/mod.rs

158 lines
5.3 KiB
Rust
Raw Normal View History

2019-06-04 18:08:26 +00:00
pub mod ffmpeg;
pub mod pandoc;
pub mod poppler;
pub mod spawning;
2019-06-07 14:57:11 +00:00
pub mod sqlite;
2019-06-06 15:59:15 +00:00
pub mod tar;
2019-06-06 09:00:13 +00:00
pub mod zip;
2019-06-07 17:00:24 +00:00
use crate::preproc::PreprocConfig;
2019-06-06 09:00:13 +00:00
use failure::*;
2019-06-07 22:04:48 +00:00
use log::*;
2019-06-04 18:08:26 +00:00
use regex::{Regex, RegexSet};
2019-06-07 22:04:48 +00:00
use std::collections::HashMap;
2019-06-06 09:00:13 +00:00
use std::io::prelude::*;
2019-06-05 19:28:35 +00:00
use std::path::Path;
2019-06-04 18:08:26 +00:00
use std::rc::Rc;
2019-06-05 19:28:35 +00:00
//pub use ffmpeg::FffmpegAdapter;
2019-06-04 18:08:26 +00:00
pub enum Matcher {
2019-06-05 14:43:40 +00:00
// MimeType(Regex),
2019-06-06 15:59:15 +00:00
/**
* without the dot. e.g. "jpg" or "tar.gz" matched as /.*\.ext$/
*
*/
2019-06-05 19:28:35 +00:00
FileExtension(String),
2019-06-04 18:08:26 +00:00
}
pub struct AdapterMeta {
2019-06-07 22:04:48 +00:00
/// unique short name of this adapter (a-z0-9 only)
2019-06-04 18:08:26 +00:00
pub name: String,
2019-06-07 22:04:48 +00:00
/// version identifier. used to key cache entries, change if your output format changes
2019-06-04 18:08:26 +00:00
pub version: i32,
2019-06-07 22:04:48 +00:00
pub description: String,
2019-06-04 18:08:26 +00:00
pub matchers: Vec<Matcher>,
}
pub struct FileMeta {
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
2019-06-06 09:00:13 +00:00
// and since we probably only want to do only matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
2019-06-04 18:08:26 +00:00
pub lossy_filename: String,
2019-06-05 14:43:40 +00:00
// pub mimetype: String,
2019-06-04 18:08:26 +00:00
}
pub trait GetMetadata {
2019-06-06 21:43:30 +00:00
fn metadata(&self) -> &AdapterMeta;
2019-06-04 18:08:26 +00:00
}
pub trait FileAdapter: GetMetadata {
2019-06-06 09:00:13 +00:00
fn adapt(&self, a: AdaptInfo) -> Fallible<()>;
}
pub struct AdaptInfo<'a> {
2019-06-06 21:43:30 +00:00
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
2019-06-06 09:00:13 +00:00
pub filepath_hint: &'a Path,
2019-06-06 21:43:30 +00:00
/// true if filepath_hint is an actual file on the file system
pub is_real_file: bool,
2019-06-07 13:43:19 +00:00
/// depth at which this file is in archives. 0 for real filesystem
pub archive_recursion_depth: i32,
2019-06-06 21:50:58 +00:00
/// stream to read the file from. can be from a file or from some decoder
2019-06-06 09:00:13 +00:00
pub inp: &'a mut dyn Read,
2019-06-06 21:50:58 +00:00
/// stream to write to. will be written to from a different thread
2019-06-06 09:00:13 +00:00
pub oup: &'a mut (dyn Write + Send),
2019-06-06 21:50:58 +00:00
/// prefix every output line with this string to better indicate the file's location if it is in some archive
2019-06-06 09:00:13 +00:00
pub line_prefix: &'a str,
// pub adapt_subobject: &'a dyn Fn(AdaptInfo) -> Fallible<()>,
2019-06-07 21:04:18 +00:00
pub config: PreprocConfig<'a>,
2019-06-04 18:08:26 +00:00
}
2019-06-05 19:28:35 +00:00
pub fn extension_to_regex(extension: &str) -> Regex {
Regex::new(&format!(".*\\.{}", &regex::escape(extension))).expect("we know this regex compiles")
2019-06-04 18:08:26 +00:00
}
2019-06-05 19:28:35 +00:00
pub fn get_adapters() -> Vec<Rc<dyn FileAdapter>> {
2019-06-04 18:08:26 +00:00
let adapters: Vec<Rc<dyn FileAdapter>> = vec![
2019-06-06 21:43:30 +00:00
Rc::new(ffmpeg::FFmpegAdapter),
Rc::new(pandoc::PandocAdapter),
Rc::new(poppler::PopplerAdapter),
Rc::new(zip::ZipAdapter),
Rc::new(tar::TarAdapter),
2019-06-07 14:57:11 +00:00
Rc::new(sqlite::SqliteAdapter),
2019-06-04 18:08:26 +00:00
];
2019-06-05 19:28:35 +00:00
adapters
}
2019-06-04 18:08:26 +00:00
2019-06-07 22:04:48 +00:00
pub fn get_adapters_filtered(adapter_names: &Vec<String>) -> Fallible<Vec<Rc<dyn FileAdapter>>> {
let all_adapters = get_adapters();
let adapters = if !adapter_names.is_empty() {
let adapters_map: HashMap<_, _> = all_adapters
.iter()
.map(|e| (e.metadata().name.clone(), e.clone()))
.collect();
let mut adapters = vec![];
let mut subtractive = false;
for (i, name) in adapter_names.iter().enumerate() {
let mut name = &name[..];
if i == 0 && name.starts_with("-") {
subtractive = true;
name = &name[1..];
adapters = all_adapters.clone();
}
if subtractive {
let inx = adapters
.iter()
.position(|a| &a.metadata().name == name)
.ok_or_else(|| format_err!("Could not remove {}: Not in list", name))?;
adapters.remove(inx);
} else {
adapters.push(
adapters_map
.get(name)
.ok_or_else(|| format_err!("Unknown adapter: \"{}\"", name))?
.clone(),
);
}
}
adapters
} else {
all_adapters
};
debug!(
"Chosen adapters: {}",
adapters
.iter()
.map(|a| a.metadata().name.clone())
.collect::<Vec<String>>()
.join(",")
);
Ok(adapters)
}
pub fn adapter_matcher(
adapter_names: &Vec<String>,
) -> Fallible<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>> {
let adapters = get_adapters_filtered(adapter_names)?;
2019-06-04 18:08:26 +00:00
let mut fname_regexes = vec![];
2019-06-05 14:43:40 +00:00
//let mut mime_regexes = vec![];
2019-06-04 18:08:26 +00:00
for adapter in adapters.into_iter() {
let metadata = adapter.metadata();
for matcher in &metadata.matchers {
match matcher {
2019-06-05 14:43:40 +00:00
//Matcher::MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
2019-06-05 19:28:35 +00:00
Matcher::FileExtension(re) => {
fname_regexes.push((extension_to_regex(re), adapter.clone()))
}
2019-06-04 18:08:26 +00:00
};
}
}
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
2019-06-05 14:43:40 +00:00
//let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
2019-06-06 21:43:30 +00:00
Ok(move |meta: FileMeta| {
2019-06-05 19:28:35 +00:00
// todo: handle multiple conflicting matches
2019-06-06 21:43:30 +00:00
let matches = fname_regex_set.matches(&meta.lossy_filename);
match matches.iter().next() {
Some(m) => Some(fname_regexes[m].1.clone()),
None => None,
2019-06-04 18:08:26 +00:00
}
2019-06-05 14:43:40 +00:00
/*for m in mime_regex_set.matches(&meta.mimetype) {
2019-06-04 18:08:26 +00:00
return Some(mime_regexes[m].1.clone());
2019-06-05 14:43:40 +00:00
}*/
2019-06-06 21:43:30 +00:00
})
2019-06-04 18:08:26 +00:00
}