rename FileMatcher

This commit is contained in:
phiresky 2020-06-17 11:45:06 +02:00
parent 0895e7a6cf
commit 977074c6e6
11 changed files with 46 additions and 43 deletions

View File

@ -34,10 +34,10 @@ pub struct AdapterMeta {
/// indicates whether this adapter can descend (=call rga_preproc again). if true, the cache key needs to include the list of active adapters /// indicates whether this adapter can descend (=call rga_preproc again). if true, the cache key needs to include the list of active adapters
pub recurses: bool, pub recurses: bool,
/// list of matchers (interpreted as a OR b OR ...) /// list of matchers (interpreted as a OR b OR ...)
pub fast_matchers: Vec<FastMatcher>, pub fast_matchers: Vec<FastFileMatcher>,
/// list of matchers when we have mime type detection active (interpreted as ORed) /// list of matchers when we have mime type detection active (interpreted as ORed)
/// warning: this *overrides* the fast matchers /// warning: this *overrides* the fast matchers
pub slow_matchers: Option<Vec<SlowMatcher>>, pub slow_matchers: Option<Vec<FileMatcher>>,
// if true, adapter is only used when user lists it in `--rga-adapters` // if true, adapter is only used when user lists it in `--rga-adapters`
pub disabled_by_default: bool, pub disabled_by_default: bool,
} }
@ -46,13 +46,13 @@ impl AdapterMeta {
pub fn get_matchers<'a>( pub fn get_matchers<'a>(
&'a self, &'a self,
slow: bool, slow: bool,
) -> Box<dyn Iterator<Item = Cow<SlowMatcher>> + 'a> { ) -> Box<dyn Iterator<Item = Cow<FileMatcher>> + 'a> {
match (slow, &self.slow_matchers) { match (slow, &self.slow_matchers) {
(true, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))), (true, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))),
(_, _) => Box::new( (_, _) => Box::new(
self.fast_matchers self.fast_matchers
.iter() .iter()
.map(|e| Cow::Owned(SlowMatcher::Fast(e.clone()))), .map(|e| Cow::Owned(FileMatcher::Fast(e.clone()))),
), ),
} }
} }
@ -65,7 +65,7 @@ pub trait FileAdapter: GetMetadata {
/// adapt a file. /// adapt a file.
/// ///
/// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher /// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
fn adapt(&self, a: AdaptInfo, detection_reason: &SlowMatcher) -> Result<ReadBox>; fn adapt(&self, a: AdaptInfo, detection_reason: &FileMatcher) -> Result<ReadBox>;
} }
pub struct AdaptInfo { pub struct AdaptInfo {
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions. /// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.

View File

@ -2,7 +2,7 @@ use super::{
spawning::{SpawningFileAdapter, SpawningFileAdapterTrait}, spawning::{SpawningFileAdapter, SpawningFileAdapterTrait},
AdapterMeta, GetMetadata, AdapterMeta, GetMetadata,
}; };
use crate::matching::{FastMatcher, SlowMatcher}; use crate::matching::{FastFileMatcher, FileMatcher};
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use lazy_static::lazy_static; use lazy_static::lazy_static;
use regex::{Captures, Regex}; use regex::{Captures, Regex};
@ -191,12 +191,12 @@ impl CustomAdapterConfig {
fast_matchers: self fast_matchers: self
.extensions .extensions
.iter() .iter()
.map(|s| FastMatcher::FileExtension(s.to_string())) .map(|s| FastFileMatcher::FileExtension(s.to_string()))
.collect(), .collect(),
slow_matchers: self.mimetypes.as_ref().map(|mimetypes| { slow_matchers: self.mimetypes.as_ref().map(|mimetypes| {
mimetypes mimetypes
.iter() .iter()
.map(|s| SlowMatcher::MimeType(s.to_string())) .map(|s| FileMatcher::MimeType(s.to_string()))
.collect() .collect()
}), }),
disabled_by_default: self.disabled_by_default.unwrap_or(false), disabled_by_default: self.disabled_by_default.unwrap_or(false),

View File

@ -22,12 +22,12 @@ lazy_static! {
recurses: true, recurses: true,
fast_matchers: EXTENSIONS fast_matchers: EXTENSIONS
.iter() .iter()
.map(|s| FastMatcher::FileExtension(s.to_string())) .map(|s| FastFileMatcher::FileExtension(s.to_string()))
.collect(), .collect(),
slow_matchers: Some( slow_matchers: Some(
MIME_TYPES MIME_TYPES
.iter() .iter()
.map(|s| SlowMatcher::MimeType(s.to_string())) .map(|s| FileMatcher::MimeType(s.to_string()))
.collect() .collect()
), ),
disabled_by_default: false disabled_by_default: false
@ -47,9 +47,9 @@ impl GetMetadata for DecompressAdapter {
} }
} }
fn decompress_any(reason: &SlowMatcher, inp: ReadBox) -> Result<ReadBox> { fn decompress_any(reason: &FileMatcher, inp: ReadBox) -> Result<ReadBox> {
use FastMatcher::*; use FastFileMatcher::*;
use SlowMatcher::*; use FileMatcher::*;
let gz = |inp: ReadBox| Box::new(flate2::read::MultiGzDecoder::new(inp)); let gz = |inp: ReadBox| Box::new(flate2::read::MultiGzDecoder::new(inp));
let bz2 = |inp: ReadBox| Box::new(bzip2::read::BzDecoder::new(inp)); let bz2 = |inp: ReadBox| Box::new(bzip2::read::BzDecoder::new(inp));
let xz = |inp: ReadBox| Box::new(xz2::read::XzDecoder::new_multi_decoder(inp)); let xz = |inp: ReadBox| Box::new(xz2::read::XzDecoder::new_multi_decoder(inp));
@ -89,7 +89,7 @@ fn get_inner_filename(filename: &Path) -> PathBuf {
} }
impl FileAdapter for DecompressAdapter { impl FileAdapter for DecompressAdapter {
fn adapt(&self, ai: AdaptInfo, detection_reason: &SlowMatcher) -> Result<ReadBox> { fn adapt(&self, ai: AdaptInfo, detection_reason: &FileMatcher) -> Result<ReadBox> {
let AdaptInfo { let AdaptInfo {
filepath_hint, filepath_hint,
inp, inp,

View File

@ -21,7 +21,7 @@ lazy_static! {
recurses: false, recurses: false,
fast_matchers: EXTENSIONS fast_matchers: EXTENSIONS
.iter() .iter()
.map(|s| FastMatcher::FileExtension(s.to_string())) .map(|s| FastFileMatcher::FileExtension(s.to_string()))
.collect(), .collect(),
slow_matchers: None, slow_matchers: None,
disabled_by_default: false disabled_by_default: false
@ -54,7 +54,7 @@ impl WritingFileAdapterTrait for FFmpegAdapter {
fn adapt_write( fn adapt_write(
&self, &self,
ai: AdaptInfo, ai: AdaptInfo,
_detection_reason: &SlowMatcher, _detection_reason: &FileMatcher,
oup: &mut dyn Write, oup: &mut dyn Write,
) -> Result<()> { ) -> Result<()> {
let AdaptInfo { let AdaptInfo {

View File

@ -138,7 +138,7 @@ pub fn pipe_output(
} }
impl FileAdapter for SpawningFileAdapter { impl FileAdapter for SpawningFileAdapter {
fn adapt(&self, ai: AdaptInfo, _detection_reason: &SlowMatcher) -> Result<ReadBox> { fn adapt(&self, ai: AdaptInfo, _detection_reason: &FileMatcher) -> Result<ReadBox> {
let AdaptInfo { let AdaptInfo {
filepath_hint, filepath_hint,
mut inp, mut inp,

View File

@ -19,9 +19,9 @@ lazy_static! {
recurses: false, // set to true if we decide to make sqlite blobs searchable (gz blob in db is kinda common I think) recurses: false, // set to true if we decide to make sqlite blobs searchable (gz blob in db is kinda common I think)
fast_matchers: EXTENSIONS fast_matchers: EXTENSIONS
.iter() .iter()
.map(|s| FastMatcher::FileExtension(s.to_string())) .map(|s| FastFileMatcher::FileExtension(s.to_string()))
.collect(), .collect(),
slow_matchers: Some(vec![SlowMatcher::MimeType( slow_matchers: Some(vec![FileMatcher::MimeType(
"application/x-sqlite3".to_owned() "application/x-sqlite3".to_owned()
)]), )]),
disabled_by_default: false disabled_by_default: false
@ -63,7 +63,7 @@ impl WritingFileAdapterTrait for SqliteAdapter {
fn adapt_write( fn adapt_write(
&self, &self,
ai: AdaptInfo, ai: AdaptInfo,
_detection_reason: &SlowMatcher, _detection_reason: &FileMatcher,
oup: &mut dyn Write, oup: &mut dyn Write,
) -> Result<()> { ) -> Result<()> {
let AdaptInfo { let AdaptInfo {
@ -125,8 +125,8 @@ impl WritingFileAdapterTrait for SqliteAdapter {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
use crate::{test_utils::*}; use crate::test_utils::*;
use std::{fs::File}; use std::fs::File;
#[test] #[test]
fn simple() -> Result<()> { fn simple() -> Result<()> {

View File

@ -8,7 +8,7 @@ pub trait WritingFileAdapterTrait: GetMetadata + Send + Clone {
fn adapt_write( fn adapt_write(
&self, &self,
a: super::AdaptInfo, a: super::AdaptInfo,
detection_reason: &crate::matching::SlowMatcher, detection_reason: &crate::matching::FileMatcher,
oup: &mut dyn Write, oup: &mut dyn Write,
) -> Result<()>; ) -> Result<()>;
} }
@ -32,7 +32,7 @@ impl FileAdapter for WritingFileAdapter {
fn adapt( fn adapt(
&self, &self,
a: super::AdaptInfo, a: super::AdaptInfo,
detection_reason: &crate::matching::SlowMatcher, detection_reason: &crate::matching::FileMatcher,
) -> anyhow::Result<ReadBox> { ) -> anyhow::Result<ReadBox> {
let (r, w) = crate::pipe::pipe(); let (r, w) = crate::pipe::pipe();
let cc = self.inner.clone(); let cc = self.inner.clone();

View File

@ -21,7 +21,7 @@ fn list_adapters(args: RgaConfig) -> Result<()> {
.fast_matchers .fast_matchers
.iter() .iter()
.map(|m| match m { .map(|m| match m {
FastMatcher::FileExtension(ext) => format!(".{}", ext), FastFileMatcher::FileExtension(ext) => format!(".{}", ext),
}) })
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join(", "); .join(", ");
@ -31,8 +31,8 @@ fn list_adapters(args: RgaConfig) -> Result<()> {
.unwrap_or(&vec![]) .unwrap_or(&vec![])
.iter() .iter()
.filter_map(|m| match m { .filter_map(|m| match m {
SlowMatcher::MimeType(x) => Some(format!("{}", x)), FileMatcher::MimeType(x) => Some(format!("{}", x)),
SlowMatcher::Fast(_) => None, FileMatcher::Fast(_) => None,
}) })
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join(", "); .join(", ");
@ -99,7 +99,7 @@ fn main() -> anyhow::Result<()> {
.iter() .iter()
.flat_map(|a| &a.metadata().fast_matchers) .flat_map(|a| &a.metadata().fast_matchers)
.flat_map(|m| match m { .flat_map(|m| match m {
FastMatcher::FileExtension(ext) => vec![ext.clone(), ext.to_ascii_uppercase()], FastFileMatcher::FileExtension(ext) => vec![ext.clone(), ext.to_ascii_uppercase()],
}) })
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join(","); .join(",");

View File

@ -11,8 +11,9 @@ use std::iter::Iterator;
use std::rc::Rc; use std::rc::Rc;
// match only based on file path
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum FastMatcher { pub enum FastFileMatcher {
// MimeType(Regex), // MimeType(Regex),
/** /**
* without the leading dot, e.g. "jpg" or "tar.gz". Matched as /.*\.ext$/ * without the leading dot, e.g. "jpg" or "tar.gz". Matched as /.*\.ext$/
@ -24,18 +25,18 @@ pub enum FastMatcher {
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum SlowMatcher { pub enum FileMatcher {
/// any type of fast matcher /// any type of fast matcher
Fast(FastMatcher), Fast(FastFileMatcher),
/// ///
/// match by exact mime type extracted using tree_magic /// match by exact mime type extracted using tree_magic
/// TODO: allow match ignoring suffix etc? /// TODO: allow match ignoring suffix etc?
MimeType(String), MimeType(String),
} }
impl From<FastMatcher> for SlowMatcher { impl From<FastFileMatcher> for FileMatcher {
fn from(t: FastMatcher) -> Self { fn from(t: FastFileMatcher) -> Self {
SlowMatcher::Fast(t) FileMatcher::Fast(t)
} }
} }
@ -55,23 +56,23 @@ pub fn extension_to_regex(extension: &str) -> Regex {
pub fn adapter_matcher( pub fn adapter_matcher(
adapters: &Vec<Rc<dyn FileAdapter>>, adapters: &Vec<Rc<dyn FileAdapter>>,
slow: bool, slow: bool,
) -> Result<impl Fn(FileMeta) -> Option<(Rc<dyn FileAdapter>, SlowMatcher)>> { ) -> Result<impl Fn(FileMeta) -> Option<(Rc<dyn FileAdapter>, FileMatcher)>> {
// need order later // need order later
let adapter_names: Vec<String> = adapters.iter().map(|e| e.metadata().name.clone()).collect(); let adapter_names: Vec<String> = adapters.iter().map(|e| e.metadata().name.clone()).collect();
let mut fname_regexes = vec![]; let mut fname_regexes = vec![];
let mut mime_regexes = vec![]; let mut mime_regexes = vec![];
for adapter in adapters.into_iter() { for adapter in adapters.into_iter() {
let metadata = adapter.metadata(); let metadata = adapter.metadata();
use SlowMatcher::*; use FileMatcher::*;
for matcher in metadata.get_matchers(slow) { for matcher in metadata.get_matchers(slow) {
match matcher.as_ref() { match matcher.as_ref() {
MimeType(re) => { MimeType(re) => {
mime_regexes.push((re.clone(), adapter.clone(), MimeType(re.clone()))) mime_regexes.push((re.clone(), adapter.clone(), MimeType(re.clone())))
} }
Fast(FastMatcher::FileExtension(re)) => fname_regexes.push(( Fast(FastFileMatcher::FileExtension(re)) => fname_regexes.push((
extension_to_regex(re), extension_to_regex(re),
adapter.clone(), adapter.clone(),
Fast(FastMatcher::FileExtension(re.clone())), Fast(FastFileMatcher::FileExtension(re.clone())),
)), )),
}; };
} }

View File

@ -91,7 +91,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<ReadBox> {
fn run_adapter( fn run_adapter(
ai: AdaptInfo, ai: AdaptInfo,
adapter: Rc<dyn FileAdapter>, adapter: Rc<dyn FileAdapter>,
detection_reason: SlowMatcher, detection_reason: FileMatcher,
filtered_adapters: &Vec<Rc<dyn FileAdapter>>, filtered_adapters: &Vec<Rc<dyn FileAdapter>>,
) -> Result<ReadBox> { ) -> Result<ReadBox> {
let AdaptInfo { let AdaptInfo {

View File

@ -1,7 +1,7 @@
use crate::{ use crate::{
adapters::{AdaptInfo, ReadBox}, adapters::{AdaptInfo, ReadBox},
config::RgaConfig, config::RgaConfig,
matching::{FastMatcher, SlowMatcher}, matching::{FastFileMatcher, FileMatcher},
}; };
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
@ -11,7 +11,7 @@ pub fn test_data_dir() -> PathBuf {
d d
} }
pub fn simple_adapt_info(filepath: &Path, inp: ReadBox) -> (AdaptInfo, SlowMatcher) { pub fn simple_adapt_info(filepath: &Path, inp: ReadBox) -> (AdaptInfo, FileMatcher) {
( (
AdaptInfo { AdaptInfo {
filepath_hint: filepath.to_owned(), filepath_hint: filepath.to_owned(),
@ -21,7 +21,9 @@ pub fn simple_adapt_info(filepath: &Path, inp: ReadBox) -> (AdaptInfo, SlowMatch
line_prefix: "PREFIX:".to_string(), line_prefix: "PREFIX:".to_string(),
config: RgaConfig::default(), config: RgaConfig::default(),
}, },
FastMatcher::FileExtension(filepath.extension().unwrap().to_string_lossy().into_owned()) FastFileMatcher::FileExtension(
.into(), filepath.extension().unwrap().to_string_lossy().into_owned(),
)
.into(),
) )
} }