restructure

This commit is contained in:
phiresky 2019-06-12 12:25:02 +02:00
parent 8353c68f79
commit c8f346c4dd
6 changed files with 129 additions and 105 deletions

View File

@ -5,11 +5,11 @@ pub mod spawning;
pub mod sqlite; pub mod sqlite;
pub mod tar; pub mod tar;
pub mod zip; pub mod zip;
use crate::matching::*;
use crate::preproc::PreprocConfig; use crate::preproc::PreprocConfig;
use failure::*; use failure::*;
use log::*; use log::*;
use regex::{Regex, RegexSet}; use regex::{Regex};
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::prelude::*; use std::io::prelude::*;
@ -17,28 +17,6 @@ use std::iter::Iterator;
use std::path::Path; use std::path::Path;
use std::rc::Rc; use std::rc::Rc;
#[derive(Clone)]
pub enum FastMatcher {
// MimeType(Regex),
/**
* without the leading dot, e.g. "jpg" or "tar.gz". Matched as /.*\.ext$/
*
*/
FileExtension(String),
// todo: maybe add others, e.g. regex on whole filename or even paths
// todo: maybe allow matching a directory (e.g. /var/lib/postgres)
}
#[derive(Clone)]
pub enum SlowMatcher {
/// any type of fast matcher
Fast(FastMatcher),
///
/// match by exact mime type extracted using tree_magic
/// TODO: allow match ignoring suffix etc?
MimeType(String),
}
pub struct AdapterMeta { pub struct AdapterMeta {
/// unique short name of this adapter (a-z0-9 only) /// unique short name of this adapter (a-z0-9 only)
pub name: String, pub name: String,
@ -53,7 +31,10 @@ pub struct AdapterMeta {
} }
impl AdapterMeta { impl AdapterMeta {
// todo: this is pretty ugly // todo: this is pretty ugly
fn get_matchers<'a>(&'a self, slow: bool) -> Box<dyn Iterator<Item = Cow<SlowMatcher>> + 'a> { pub fn get_matchers<'a>(
&'a self,
slow: bool,
) -> Box<dyn Iterator<Item = Cow<SlowMatcher>> + 'a> {
match (slow, &self.slow_matchers) { match (slow, &self.slow_matchers) {
(true, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))), (true, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))),
(_, _) => Box::new( (_, _) => Box::new(
@ -65,14 +46,6 @@ impl AdapterMeta {
} }
} }
pub struct FileMeta {
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
// and since we probably only want to do only matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
pub lossy_filename: String,
// only given when slow matching is enabled
pub mimetype: Option<String>,
}
pub trait GetMetadata { pub trait GetMetadata {
fn metadata(&self) -> &AdapterMeta; fn metadata(&self) -> &AdapterMeta;
} }
@ -160,73 +133,3 @@ pub fn get_adapters_filtered<T: AsRef<str>>(
); );
Ok(adapters) Ok(adapters)
} }
pub fn adapter_matcher<T: AsRef<str>>(
adapter_names: &[T],
slow: bool,
) -> Fallible<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>> {
let adapters = get_adapters_filtered(adapter_names)?;
// need order later
let adapter_names: Vec<String> = adapters.iter().map(|e| e.metadata().name.clone()).collect();
let mut fname_regexes = vec![];
let mut mime_regexes = vec![];
for adapter in adapters.into_iter() {
let metadata = adapter.metadata();
use SlowMatcher::*;
for matcher in metadata.get_matchers(slow) {
match matcher.as_ref() {
MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
Fast(FastMatcher::FileExtension(re)) => {
fname_regexes.push((extension_to_regex(re), adapter.clone()))
}
};
}
}
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
Ok(move |meta: FileMeta| {
let fname_matches: Vec<_> = fname_regex_set
.matches(&meta.lossy_filename)
.into_iter()
.collect();
let mime_matches: Vec<_> = if slow {
mime_regex_set
.matches(&meta.mimetype.expect("No mimetype?"))
.into_iter()
.collect()
} else {
vec![]
};
if fname_matches.len() + mime_matches.len() > 1 {
// get first according to original priority list...
let fa = fname_matches.iter().map(|e| fname_regexes[*e].1.clone());
let fb = mime_matches.iter().map(|e| mime_regexes[*e].1.clone());
let mut v = vec![];
v.extend(fa);
v.extend(fb);
v.sort_by_key(|e| {
(adapter_names
.iter()
.position(|r| r == &e.metadata().name)
.expect("impossib7"))
});
eprintln!(
"Warning: found multiple adapters for {}:",
meta.lossy_filename
);
for mmatch in v.iter() {
eprintln!(" - {}", mmatch.metadata().name);
}
return Some(v[0].clone());
}
if mime_matches.is_empty() {
if fname_matches.is_empty() {
None
} else {
Some(fname_regexes[fname_matches[0]].1.clone())
}
} else {
Some(mime_regexes[mime_matches[0]].1.clone())
}
})
}

View File

@ -41,8 +41,8 @@ where
let extension = filename.extension().map(|e| e.to_string_lossy().to_owned()); let extension = filename.extension().map(|e| e.to_string_lossy().to_owned());
match extension { match extension {
Some(e) => Ok(match e.to_owned().as_ref() { Some(e) => Ok(match e.to_owned().as_ref() {
"gz" => Box::new(flate2::read::MultiGzDecoder::new(inp)), "tgz" | "gz" => Box::new(flate2::read::MultiGzDecoder::new(inp)),
"bz2" => Box::new(bzip2::read::BzDecoder::new(inp)), "tbz" | "tbz2" | "bz2" => Box::new(bzip2::read::BzDecoder::new(inp)),
"xz" => Box::new(xz2::read::XzDecoder::new_multi_decoder(inp)), "xz" => Box::new(xz2::read::XzDecoder::new_multi_decoder(inp)),
"zst" => Box::new(zstd::stream::read::Decoder::new(inp)?), "zst" => Box::new(zstd::stream::read::Decoder::new(inp)?),
"tar" => Box::new(inp), "tar" => Box::new(inp),

View File

@ -2,6 +2,7 @@ use failure::Fallible;
use rga::adapters::spawning::map_exe_error; use rga::adapters::spawning::map_exe_error;
use rga::adapters::*; use rga::adapters::*;
use rga::args::*; use rga::args::*;
use rga::matching::*;
use ripgrep_all as rga; use ripgrep_all as rga;
use std::process::Command; use std::process::Command;

View File

@ -3,6 +3,7 @@
pub mod adapters; pub mod adapters;
pub mod args; pub mod args;
mod caching_writer; mod caching_writer;
pub mod matching;
pub mod preproc; pub mod preproc;
pub mod preproc_cache; pub mod preproc_cache;
pub use caching_writer::CachingWriter; pub use caching_writer::CachingWriter;

118
src/matching.rs Normal file
View File

@ -0,0 +1,118 @@
/**
* Module for matching adapters to files based on file name or mime type
*/
use crate::adapters::*;
use failure::*;
use regex::{Regex, RegexSet};
use std::iter::Iterator;
use std::rc::Rc;
#[derive(Clone)]
pub enum FastMatcher {
// MimeType(Regex),
/**
* without the leading dot, e.g. "jpg" or "tar.gz". Matched as /.*\.ext$/
*
*/
FileExtension(String),
// todo: maybe add others, e.g. regex on whole filename or even paths
// todo: maybe allow matching a directory (e.g. /var/lib/postgres)
}
#[derive(Clone)]
pub enum SlowMatcher {
/// any type of fast matcher
Fast(FastMatcher),
///
/// match by exact mime type extracted using tree_magic
/// TODO: allow match ignoring suffix etc?
MimeType(String),
}
pub struct FileMeta {
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
// and since we probably only want to do only matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
pub lossy_filename: String,
// only given when slow matching is enabled
pub mimetype: Option<String>,
}
pub fn extension_to_regex(extension: &str) -> Regex {
Regex::new(&format!(".*\\.{}", &regex::escape(extension))).expect("we know this regex compiles")
}
pub fn adapter_matcher<T: AsRef<str>>(
adapter_names: &[T],
slow: bool,
) -> Fallible<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>> {
let adapters = get_adapters_filtered(adapter_names)?;
// need order later
let adapter_names: Vec<String> = adapters.iter().map(|e| e.metadata().name.clone()).collect();
let mut fname_regexes = vec![];
let mut mime_regexes = vec![];
for adapter in adapters.into_iter() {
let metadata = adapter.metadata();
use SlowMatcher::*;
for matcher in metadata.get_matchers(slow) {
match matcher.as_ref() {
MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
Fast(FastMatcher::FileExtension(re)) => {
fname_regexes.push((extension_to_regex(re), adapter.clone()))
}
};
}
}
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
Ok(move |meta: FileMeta| {
let fname_matches: Vec<_> = fname_regex_set
.matches(&meta.lossy_filename)
.into_iter()
.collect();
let mime_matches: Vec<_> = if slow {
mime_regex_set
.matches(&meta.mimetype.expect("No mimetype?"))
.into_iter()
.collect()
} else {
vec![]
};
if fname_matches.len() + mime_matches.len() > 1 {
// get first according to original priority list...
let fa = fname_matches.iter().map(|e| fname_regexes[*e].1.clone());
let fb = mime_matches.iter().map(|e| mime_regexes[*e].1.clone());
let mut v = vec![];
v.extend(fa);
v.extend(fb);
v.sort_by_key(|e| {
(adapter_names
.iter()
.position(|r| r == &e.metadata().name)
.expect("impossib7"))
});
eprintln!(
"Warning: found multiple adapters for {}:",
meta.lossy_filename
);
for mmatch in v.iter() {
eprintln!(" - {}", mmatch.metadata().name);
}
return Some(v[0].clone());
}
if mime_matches.is_empty() {
if fname_matches.is_empty() {
None
} else {
Some(fname_regexes[fname_matches[0]].1.clone())
}
} else {
Some(mime_regexes[mime_matches[0]].1.clone())
}
})
}

View File

@ -1,5 +1,6 @@
use crate::adapters::*; use crate::adapters::*;
use crate::args::RgaArgs; use crate::args::RgaArgs;
use crate::matching::*;
use crate::CachingWriter; use crate::CachingWriter;
use failure::Fallible; use failure::Fallible;
use failure::{format_err, Error}; use failure::{format_err, Error};