rename FileMatcher

This commit is contained in:
phiresky 2020-06-17 11:45:06 +02:00
parent 0895e7a6cf
commit 977074c6e6
11 changed files with 46 additions and 43 deletions

View File

@ -34,10 +34,10 @@ pub struct AdapterMeta {
/// indicates whether this adapter can descend (=call rga_preproc again). if true, the cache key needs to include the list of active adapters
pub recurses: bool,
/// list of matchers (interpreted as a OR b OR ...)
pub fast_matchers: Vec<FastMatcher>,
pub fast_matchers: Vec<FastFileMatcher>,
/// list of matchers when we have mime type detection active (interpreted as ORed)
/// warning: this *overrides* the fast matchers
pub slow_matchers: Option<Vec<SlowMatcher>>,
pub slow_matchers: Option<Vec<FileMatcher>>,
// if true, adapter is only used when user lists it in `--rga-adapters`
pub disabled_by_default: bool,
}
@ -46,13 +46,13 @@ impl AdapterMeta {
pub fn get_matchers<'a>(
&'a self,
slow: bool,
) -> Box<dyn Iterator<Item = Cow<SlowMatcher>> + 'a> {
) -> Box<dyn Iterator<Item = Cow<FileMatcher>> + 'a> {
match (slow, &self.slow_matchers) {
(true, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))),
(_, _) => Box::new(
self.fast_matchers
.iter()
.map(|e| Cow::Owned(SlowMatcher::Fast(e.clone()))),
.map(|e| Cow::Owned(FileMatcher::Fast(e.clone()))),
),
}
}
@ -65,7 +65,7 @@ pub trait FileAdapter: GetMetadata {
/// adapt a file.
///
/// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
fn adapt(&self, a: AdaptInfo, detection_reason: &SlowMatcher) -> Result<ReadBox>;
fn adapt(&self, a: AdaptInfo, detection_reason: &FileMatcher) -> Result<ReadBox>;
}
pub struct AdaptInfo {
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.

View File

@ -2,7 +2,7 @@ use super::{
spawning::{SpawningFileAdapter, SpawningFileAdapterTrait},
AdapterMeta, GetMetadata,
};
use crate::matching::{FastMatcher, SlowMatcher};
use crate::matching::{FastFileMatcher, FileMatcher};
use anyhow::{Context, Result};
use lazy_static::lazy_static;
use regex::{Captures, Regex};
@ -191,12 +191,12 @@ impl CustomAdapterConfig {
fast_matchers: self
.extensions
.iter()
.map(|s| FastMatcher::FileExtension(s.to_string()))
.map(|s| FastFileMatcher::FileExtension(s.to_string()))
.collect(),
slow_matchers: self.mimetypes.as_ref().map(|mimetypes| {
mimetypes
.iter()
.map(|s| SlowMatcher::MimeType(s.to_string()))
.map(|s| FileMatcher::MimeType(s.to_string()))
.collect()
}),
disabled_by_default: self.disabled_by_default.unwrap_or(false),

View File

@ -22,12 +22,12 @@ lazy_static! {
recurses: true,
fast_matchers: EXTENSIONS
.iter()
.map(|s| FastMatcher::FileExtension(s.to_string()))
.map(|s| FastFileMatcher::FileExtension(s.to_string()))
.collect(),
slow_matchers: Some(
MIME_TYPES
.iter()
.map(|s| SlowMatcher::MimeType(s.to_string()))
.map(|s| FileMatcher::MimeType(s.to_string()))
.collect()
),
disabled_by_default: false
@ -47,9 +47,9 @@ impl GetMetadata for DecompressAdapter {
}
}
fn decompress_any(reason: &SlowMatcher, inp: ReadBox) -> Result<ReadBox> {
use FastMatcher::*;
use SlowMatcher::*;
fn decompress_any(reason: &FileMatcher, inp: ReadBox) -> Result<ReadBox> {
use FastFileMatcher::*;
use FileMatcher::*;
let gz = |inp: ReadBox| Box::new(flate2::read::MultiGzDecoder::new(inp));
let bz2 = |inp: ReadBox| Box::new(bzip2::read::BzDecoder::new(inp));
let xz = |inp: ReadBox| Box::new(xz2::read::XzDecoder::new_multi_decoder(inp));
@ -89,7 +89,7 @@ fn get_inner_filename(filename: &Path) -> PathBuf {
}
impl FileAdapter for DecompressAdapter {
fn adapt(&self, ai: AdaptInfo, detection_reason: &SlowMatcher) -> Result<ReadBox> {
fn adapt(&self, ai: AdaptInfo, detection_reason: &FileMatcher) -> Result<ReadBox> {
let AdaptInfo {
filepath_hint,
inp,

View File

@ -21,7 +21,7 @@ lazy_static! {
recurses: false,
fast_matchers: EXTENSIONS
.iter()
.map(|s| FastMatcher::FileExtension(s.to_string()))
.map(|s| FastFileMatcher::FileExtension(s.to_string()))
.collect(),
slow_matchers: None,
disabled_by_default: false
@ -54,7 +54,7 @@ impl WritingFileAdapterTrait for FFmpegAdapter {
fn adapt_write(
&self,
ai: AdaptInfo,
_detection_reason: &SlowMatcher,
_detection_reason: &FileMatcher,
oup: &mut dyn Write,
) -> Result<()> {
let AdaptInfo {

View File

@ -138,7 +138,7 @@ pub fn pipe_output(
}
impl FileAdapter for SpawningFileAdapter {
fn adapt(&self, ai: AdaptInfo, _detection_reason: &SlowMatcher) -> Result<ReadBox> {
fn adapt(&self, ai: AdaptInfo, _detection_reason: &FileMatcher) -> Result<ReadBox> {
let AdaptInfo {
filepath_hint,
mut inp,

View File

@ -19,9 +19,9 @@ lazy_static! {
recurses: false, // set to true if we decide to make sqlite blobs searchable (gz blob in db is kinda common I think)
fast_matchers: EXTENSIONS
.iter()
.map(|s| FastMatcher::FileExtension(s.to_string()))
.map(|s| FastFileMatcher::FileExtension(s.to_string()))
.collect(),
slow_matchers: Some(vec![SlowMatcher::MimeType(
slow_matchers: Some(vec![FileMatcher::MimeType(
"application/x-sqlite3".to_owned()
)]),
disabled_by_default: false
@ -63,7 +63,7 @@ impl WritingFileAdapterTrait for SqliteAdapter {
fn adapt_write(
&self,
ai: AdaptInfo,
_detection_reason: &SlowMatcher,
_detection_reason: &FileMatcher,
oup: &mut dyn Write,
) -> Result<()> {
let AdaptInfo {
@ -125,8 +125,8 @@ impl WritingFileAdapterTrait for SqliteAdapter {
#[cfg(test)]
mod test {
use super::*;
use crate::{test_utils::*};
use std::{fs::File};
use crate::test_utils::*;
use std::fs::File;
#[test]
fn simple() -> Result<()> {

View File

@ -8,7 +8,7 @@ pub trait WritingFileAdapterTrait: GetMetadata + Send + Clone {
fn adapt_write(
&self,
a: super::AdaptInfo,
detection_reason: &crate::matching::SlowMatcher,
detection_reason: &crate::matching::FileMatcher,
oup: &mut dyn Write,
) -> Result<()>;
}
@ -32,7 +32,7 @@ impl FileAdapter for WritingFileAdapter {
fn adapt(
&self,
a: super::AdaptInfo,
detection_reason: &crate::matching::SlowMatcher,
detection_reason: &crate::matching::FileMatcher,
) -> anyhow::Result<ReadBox> {
let (r, w) = crate::pipe::pipe();
let cc = self.inner.clone();

View File

@ -21,7 +21,7 @@ fn list_adapters(args: RgaConfig) -> Result<()> {
.fast_matchers
.iter()
.map(|m| match m {
FastMatcher::FileExtension(ext) => format!(".{}", ext),
FastFileMatcher::FileExtension(ext) => format!(".{}", ext),
})
.collect::<Vec<_>>()
.join(", ");
@ -31,8 +31,8 @@ fn list_adapters(args: RgaConfig) -> Result<()> {
.unwrap_or(&vec![])
.iter()
.filter_map(|m| match m {
SlowMatcher::MimeType(x) => Some(format!("{}", x)),
SlowMatcher::Fast(_) => None,
FileMatcher::MimeType(x) => Some(format!("{}", x)),
FileMatcher::Fast(_) => None,
})
.collect::<Vec<_>>()
.join(", ");
@ -99,7 +99,7 @@ fn main() -> anyhow::Result<()> {
.iter()
.flat_map(|a| &a.metadata().fast_matchers)
.flat_map(|m| match m {
FastMatcher::FileExtension(ext) => vec![ext.clone(), ext.to_ascii_uppercase()],
FastFileMatcher::FileExtension(ext) => vec![ext.clone(), ext.to_ascii_uppercase()],
})
.collect::<Vec<_>>()
.join(",");

View File

@ -11,8 +11,9 @@ use std::iter::Iterator;
use std::rc::Rc;
// match only based on file path
#[derive(Clone, Debug)]
pub enum FastMatcher {
pub enum FastFileMatcher {
// MimeType(Regex),
/**
* without the leading dot, e.g. "jpg" or "tar.gz". Matched as /.*\.ext$/
@ -24,18 +25,18 @@ pub enum FastMatcher {
}
#[derive(Clone, Debug)]
pub enum SlowMatcher {
pub enum FileMatcher {
/// any type of fast matcher
Fast(FastMatcher),
Fast(FastFileMatcher),
///
/// match by exact mime type extracted using tree_magic
/// TODO: allow match ignoring suffix etc?
MimeType(String),
}
impl From<FastMatcher> for SlowMatcher {
fn from(t: FastMatcher) -> Self {
SlowMatcher::Fast(t)
impl From<FastFileMatcher> for FileMatcher {
fn from(t: FastFileMatcher) -> Self {
FileMatcher::Fast(t)
}
}
@ -55,23 +56,23 @@ pub fn extension_to_regex(extension: &str) -> Regex {
pub fn adapter_matcher(
adapters: &Vec<Rc<dyn FileAdapter>>,
slow: bool,
) -> Result<impl Fn(FileMeta) -> Option<(Rc<dyn FileAdapter>, SlowMatcher)>> {
) -> Result<impl Fn(FileMeta) -> Option<(Rc<dyn FileAdapter>, FileMatcher)>> {
// need order later
let adapter_names: Vec<String> = adapters.iter().map(|e| e.metadata().name.clone()).collect();
let mut fname_regexes = vec![];
let mut mime_regexes = vec![];
for adapter in adapters.into_iter() {
let metadata = adapter.metadata();
use SlowMatcher::*;
use FileMatcher::*;
for matcher in metadata.get_matchers(slow) {
match matcher.as_ref() {
MimeType(re) => {
mime_regexes.push((re.clone(), adapter.clone(), MimeType(re.clone())))
}
Fast(FastMatcher::FileExtension(re)) => fname_regexes.push((
Fast(FastFileMatcher::FileExtension(re)) => fname_regexes.push((
extension_to_regex(re),
adapter.clone(),
Fast(FastMatcher::FileExtension(re.clone())),
Fast(FastFileMatcher::FileExtension(re.clone())),
)),
};
}

View File

@ -91,7 +91,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<ReadBox> {
fn run_adapter(
ai: AdaptInfo,
adapter: Rc<dyn FileAdapter>,
detection_reason: SlowMatcher,
detection_reason: FileMatcher,
filtered_adapters: &Vec<Rc<dyn FileAdapter>>,
) -> Result<ReadBox> {
let AdaptInfo {

View File

@ -1,7 +1,7 @@
use crate::{
adapters::{AdaptInfo, ReadBox},
config::RgaConfig,
matching::{FastMatcher, SlowMatcher},
matching::{FastFileMatcher, FileMatcher},
};
use std::path::{Path, PathBuf};
@ -11,7 +11,7 @@ pub fn test_data_dir() -> PathBuf {
d
}
pub fn simple_adapt_info(filepath: &Path, inp: ReadBox) -> (AdaptInfo, SlowMatcher) {
pub fn simple_adapt_info(filepath: &Path, inp: ReadBox) -> (AdaptInfo, FileMatcher) {
(
AdaptInfo {
filepath_hint: filepath.to_owned(),
@ -21,7 +21,9 @@ pub fn simple_adapt_info(filepath: &Path, inp: ReadBox) -> (AdaptInfo, SlowMatch
line_prefix: "PREFIX:".to_string(),
config: RgaConfig::default(),
},
FastMatcher::FileExtension(filepath.extension().unwrap().to_string_lossy().into_owned())
FastFileMatcher::FileExtension(
filepath.extension().unwrap().to_string_lossy().into_owned(),
)
.into(),
)
}