ripgrep-all/src/adapters.rs

80 lines
2.5 KiB
Rust
Raw Normal View History

2019-06-04 18:08:26 +00:00
pub mod ffmpeg;
pub mod pandoc;
pub mod poppler;
pub mod spawning;
use regex::{Regex, RegexSet};
use std::io::BufRead;
use std::io::Write;
2019-06-05 19:28:35 +00:00
use std::path::Path;
2019-06-04 18:08:26 +00:00
use std::rc::Rc;
2019-06-05 19:28:35 +00:00
use failure::*;
//pub use ffmpeg::FffmpegAdapter;
2019-06-04 18:08:26 +00:00
pub enum Matcher {
2019-06-05 14:43:40 +00:00
// MimeType(Regex),
2019-06-05 19:28:35 +00:00
FileExtension(String),
2019-06-04 18:08:26 +00:00
}
pub struct AdapterMeta {
pub name: String,
pub version: i32,
pub matchers: Vec<Matcher>,
}
pub struct FileMeta {
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
// and since we probably only want to do matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
pub lossy_filename: String,
2019-06-05 14:43:40 +00:00
// pub mimetype: String,
2019-06-04 18:08:26 +00:00
}
pub trait GetMetadata {
fn metadata<'a>(&'a self) -> &'a AdapterMeta;
}
pub trait FileAdapter: GetMetadata {
2019-06-05 19:28:35 +00:00
fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()>;
2019-06-04 18:08:26 +00:00
}
2019-06-05 19:28:35 +00:00
pub fn extension_to_regex(extension: &str) -> Regex {
Regex::new(&format!(".*\\.{}", &regex::escape(extension))).expect("we know this regex compiles")
2019-06-04 18:08:26 +00:00
}
2019-06-05 19:28:35 +00:00
pub fn get_adapters() -> Vec<Rc<dyn FileAdapter>> {
2019-06-04 18:08:26 +00:00
let adapters: Vec<Rc<dyn FileAdapter>> = vec![
Rc::new(crate::adapters::ffmpeg::FFmpegAdapter::new()),
Rc::new(crate::adapters::pandoc::PandocAdapter::new()),
Rc::new(crate::adapters::poppler::PopplerAdapter::new()),
];
2019-06-05 19:28:35 +00:00
adapters
}
2019-06-04 18:08:26 +00:00
2019-06-05 19:28:35 +00:00
pub fn adapter_matcher() -> Result<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>, regex::Error> {
let adapters = get_adapters();
2019-06-04 18:08:26 +00:00
let mut fname_regexes = vec![];
2019-06-05 14:43:40 +00:00
//let mut mime_regexes = vec![];
2019-06-04 18:08:26 +00:00
for adapter in adapters.into_iter() {
let metadata = adapter.metadata();
for matcher in &metadata.matchers {
match matcher {
2019-06-05 14:43:40 +00:00
//Matcher::MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
2019-06-05 19:28:35 +00:00
Matcher::FileExtension(re) => {
fname_regexes.push((extension_to_regex(re), adapter.clone()))
}
2019-06-04 18:08:26 +00:00
};
}
}
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
2019-06-05 14:43:40 +00:00
//let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
2019-06-04 18:08:26 +00:00
return Ok(move |meta: FileMeta| {
2019-06-05 19:28:35 +00:00
// todo: handle multiple conflicting matches
2019-06-04 18:08:26 +00:00
for m in fname_regex_set.matches(&meta.lossy_filename) {
return Some(fname_regexes[m].1.clone());
}
2019-06-05 14:43:40 +00:00
/*for m in mime_regex_set.matches(&meta.mimetype) {
2019-06-04 18:08:26 +00:00
return Some(mime_regexes[m].1.clone());
2019-06-05 14:43:40 +00:00
}*/
2019-06-04 18:08:26 +00:00
return None;
});
}