2019-06-12 10:25:02 +00:00
|
|
|
/**
|
|
|
|
* Module for matching adapters to files based on file name or mime type
|
|
|
|
*/
|
|
|
|
use crate::adapters::*;
|
|
|
|
|
2020-06-06 10:57:43 +00:00
|
|
|
use anyhow::*;
|
2019-06-12 10:25:02 +00:00
|
|
|
|
|
|
|
use regex::{Regex, RegexSet};
|
|
|
|
|
|
|
|
use std::iter::Iterator;
|
|
|
|
|
|
|
|
use std::rc::Rc;
|
|
|
|
|
2020-05-20 13:55:12 +00:00
|
|
|
#[derive(Clone, Debug)]
|
2019-06-12 10:25:02 +00:00
|
|
|
pub enum FastMatcher {
|
2020-05-19 09:10:11 +00:00
|
|
|
// MimeType(Regex),
|
|
|
|
/**
|
|
|
|
* without the leading dot, e.g. "jpg" or "tar.gz". Matched as /.*\.ext$/
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
FileExtension(String),
|
|
|
|
// todo: maybe add others, e.g. regex on whole filename or even paths
|
|
|
|
// todo: maybe allow matching a directory (e.g. /var/lib/postgres)
|
2019-06-12 10:25:02 +00:00
|
|
|
}
|
|
|
|
|
2020-05-20 13:55:12 +00:00
|
|
|
#[derive(Clone, Debug)]
|
2019-06-12 10:25:02 +00:00
|
|
|
pub enum SlowMatcher {
|
2020-05-19 09:10:11 +00:00
|
|
|
/// any type of fast matcher
|
|
|
|
Fast(FastMatcher),
|
|
|
|
///
|
|
|
|
/// match by exact mime type extracted using tree_magic
|
|
|
|
/// TODO: allow match ignoring suffix etc?
|
|
|
|
MimeType(String),
|
2019-06-12 10:25:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub struct FileMeta {
|
2020-05-19 09:10:11 +00:00
|
|
|
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
|
|
|
|
// and since we probably only want to do only matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
|
|
|
|
pub lossy_filename: String,
|
|
|
|
// only given when slow matching is enabled
|
|
|
|
pub mimetype: Option<String>,
|
2019-06-12 10:25:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn extension_to_regex(extension: &str) -> Regex {
|
2020-05-19 10:00:20 +00:00
|
|
|
Regex::new(&format!("(?i)\\.{}$", ®ex::escape(extension)))
|
|
|
|
.expect("we know this regex compiles")
|
2019-06-12 10:25:02 +00:00
|
|
|
}
|
|
|
|
|
2020-06-08 23:45:52 +00:00
|
|
|
pub fn adapter_matcher(
|
2020-06-09 10:47:34 +00:00
|
|
|
adapters: &Vec<Rc<dyn FileAdapter>>,
|
2020-05-19 09:10:11 +00:00
|
|
|
slow: bool,
|
2020-06-06 10:57:43 +00:00
|
|
|
) -> Result<impl Fn(FileMeta) -> Option<(Rc<dyn FileAdapter>, SlowMatcher)>> {
|
2020-05-19 09:10:11 +00:00
|
|
|
// need order later
|
|
|
|
let adapter_names: Vec<String> = adapters.iter().map(|e| e.metadata().name.clone()).collect();
|
|
|
|
let mut fname_regexes = vec![];
|
|
|
|
let mut mime_regexes = vec![];
|
|
|
|
for adapter in adapters.into_iter() {
|
|
|
|
let metadata = adapter.metadata();
|
|
|
|
use SlowMatcher::*;
|
|
|
|
for matcher in metadata.get_matchers(slow) {
|
|
|
|
match matcher.as_ref() {
|
|
|
|
MimeType(re) => {
|
|
|
|
mime_regexes.push((re.clone(), adapter.clone(), MimeType(re.clone())))
|
|
|
|
}
|
|
|
|
Fast(FastMatcher::FileExtension(re)) => fname_regexes.push((
|
|
|
|
extension_to_regex(re),
|
|
|
|
adapter.clone(),
|
|
|
|
Fast(FastMatcher::FileExtension(re.clone())),
|
|
|
|
)),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
|
|
|
|
let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
|
|
|
|
Ok(move |meta: FileMeta| {
|
|
|
|
let fname_matches: Vec<_> = fname_regex_set
|
|
|
|
.matches(&meta.lossy_filename)
|
|
|
|
.into_iter()
|
|
|
|
.collect();
|
|
|
|
let mime_matches: Vec<_> = if slow {
|
|
|
|
mime_regex_set
|
|
|
|
.matches(&meta.mimetype.expect("No mimetype?"))
|
|
|
|
.into_iter()
|
|
|
|
.collect()
|
|
|
|
} else {
|
|
|
|
vec![]
|
|
|
|
};
|
|
|
|
if fname_matches.len() + mime_matches.len() > 1 {
|
|
|
|
// get first according to original priority list...
|
|
|
|
// todo: kinda ugly
|
|
|
|
let fa = fname_matches
|
|
|
|
.iter()
|
|
|
|
.map(|e| (fname_regexes[*e].1.clone(), fname_regexes[*e].2.clone()));
|
|
|
|
let fb = mime_matches
|
|
|
|
.iter()
|
|
|
|
.map(|e| (mime_regexes[*e].1.clone(), mime_regexes[*e].2.clone()));
|
|
|
|
let mut v = vec![];
|
|
|
|
v.extend(fa);
|
|
|
|
v.extend(fb);
|
|
|
|
v.sort_by_key(|e| {
|
|
|
|
adapter_names
|
|
|
|
.iter()
|
|
|
|
.position(|r| r == &e.0.metadata().name)
|
|
|
|
.expect("impossib7")
|
|
|
|
});
|
|
|
|
eprintln!(
|
|
|
|
"Warning: found multiple adapters for {}:",
|
|
|
|
meta.lossy_filename
|
|
|
|
);
|
|
|
|
for mmatch in v.iter() {
|
|
|
|
eprintln!(" - {}", mmatch.0.metadata().name);
|
|
|
|
}
|
|
|
|
return Some(v[0].clone());
|
|
|
|
}
|
|
|
|
if mime_matches.is_empty() {
|
|
|
|
if fname_matches.is_empty() {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
let (_, adapter, matcher) = &fname_regexes[fname_matches[0]];
|
|
|
|
Some((adapter.clone(), matcher.clone()))
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
let (_, adapter, matcher) = &mime_regexes[mime_matches[0]];
|
|
|
|
Some((adapter.clone(), matcher.clone()))
|
|
|
|
}
|
|
|
|
})
|
2019-06-12 10:25:02 +00:00
|
|
|
}
|