mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-24 12:24:56 +00:00
restructure
This commit is contained in:
parent
8353c68f79
commit
c8f346c4dd
@ -5,11 +5,11 @@ pub mod spawning;
|
|||||||
pub mod sqlite;
|
pub mod sqlite;
|
||||||
pub mod tar;
|
pub mod tar;
|
||||||
pub mod zip;
|
pub mod zip;
|
||||||
|
use crate::matching::*;
|
||||||
use crate::preproc::PreprocConfig;
|
use crate::preproc::PreprocConfig;
|
||||||
use failure::*;
|
use failure::*;
|
||||||
use log::*;
|
use log::*;
|
||||||
use regex::{Regex, RegexSet};
|
use regex::{Regex};
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
@ -17,28 +17,6 @@ use std::iter::Iterator;
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub enum FastMatcher {
|
|
||||||
// MimeType(Regex),
|
|
||||||
/**
|
|
||||||
* without the leading dot, e.g. "jpg" or "tar.gz". Matched as /.*\.ext$/
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
FileExtension(String),
|
|
||||||
// todo: maybe add others, e.g. regex on whole filename or even paths
|
|
||||||
// todo: maybe allow matching a directory (e.g. /var/lib/postgres)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub enum SlowMatcher {
|
|
||||||
/// any type of fast matcher
|
|
||||||
Fast(FastMatcher),
|
|
||||||
///
|
|
||||||
/// match by exact mime type extracted using tree_magic
|
|
||||||
/// TODO: allow match ignoring suffix etc?
|
|
||||||
MimeType(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct AdapterMeta {
|
pub struct AdapterMeta {
|
||||||
/// unique short name of this adapter (a-z0-9 only)
|
/// unique short name of this adapter (a-z0-9 only)
|
||||||
pub name: String,
|
pub name: String,
|
||||||
@ -53,7 +31,10 @@ pub struct AdapterMeta {
|
|||||||
}
|
}
|
||||||
impl AdapterMeta {
|
impl AdapterMeta {
|
||||||
// todo: this is pretty ugly
|
// todo: this is pretty ugly
|
||||||
fn get_matchers<'a>(&'a self, slow: bool) -> Box<dyn Iterator<Item = Cow<SlowMatcher>> + 'a> {
|
pub fn get_matchers<'a>(
|
||||||
|
&'a self,
|
||||||
|
slow: bool,
|
||||||
|
) -> Box<dyn Iterator<Item = Cow<SlowMatcher>> + 'a> {
|
||||||
match (slow, &self.slow_matchers) {
|
match (slow, &self.slow_matchers) {
|
||||||
(true, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))),
|
(true, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))),
|
||||||
(_, _) => Box::new(
|
(_, _) => Box::new(
|
||||||
@ -65,14 +46,6 @@ impl AdapterMeta {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct FileMeta {
|
|
||||||
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
|
|
||||||
// and since we probably only want to do only matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
|
|
||||||
pub lossy_filename: String,
|
|
||||||
// only given when slow matching is enabled
|
|
||||||
pub mimetype: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait GetMetadata {
|
pub trait GetMetadata {
|
||||||
fn metadata(&self) -> &AdapterMeta;
|
fn metadata(&self) -> &AdapterMeta;
|
||||||
}
|
}
|
||||||
@ -160,73 +133,3 @@ pub fn get_adapters_filtered<T: AsRef<str>>(
|
|||||||
);
|
);
|
||||||
Ok(adapters)
|
Ok(adapters)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn adapter_matcher<T: AsRef<str>>(
|
|
||||||
adapter_names: &[T],
|
|
||||||
slow: bool,
|
|
||||||
) -> Fallible<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>> {
|
|
||||||
let adapters = get_adapters_filtered(adapter_names)?;
|
|
||||||
// need order later
|
|
||||||
let adapter_names: Vec<String> = adapters.iter().map(|e| e.metadata().name.clone()).collect();
|
|
||||||
let mut fname_regexes = vec![];
|
|
||||||
let mut mime_regexes = vec![];
|
|
||||||
for adapter in adapters.into_iter() {
|
|
||||||
let metadata = adapter.metadata();
|
|
||||||
use SlowMatcher::*;
|
|
||||||
for matcher in metadata.get_matchers(slow) {
|
|
||||||
match matcher.as_ref() {
|
|
||||||
MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
|
|
||||||
Fast(FastMatcher::FileExtension(re)) => {
|
|
||||||
fname_regexes.push((extension_to_regex(re), adapter.clone()))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
|
|
||||||
let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
|
|
||||||
Ok(move |meta: FileMeta| {
|
|
||||||
let fname_matches: Vec<_> = fname_regex_set
|
|
||||||
.matches(&meta.lossy_filename)
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
let mime_matches: Vec<_> = if slow {
|
|
||||||
mime_regex_set
|
|
||||||
.matches(&meta.mimetype.expect("No mimetype?"))
|
|
||||||
.into_iter()
|
|
||||||
.collect()
|
|
||||||
} else {
|
|
||||||
vec![]
|
|
||||||
};
|
|
||||||
if fname_matches.len() + mime_matches.len() > 1 {
|
|
||||||
// get first according to original priority list...
|
|
||||||
let fa = fname_matches.iter().map(|e| fname_regexes[*e].1.clone());
|
|
||||||
let fb = mime_matches.iter().map(|e| mime_regexes[*e].1.clone());
|
|
||||||
let mut v = vec![];
|
|
||||||
v.extend(fa);
|
|
||||||
v.extend(fb);
|
|
||||||
v.sort_by_key(|e| {
|
|
||||||
(adapter_names
|
|
||||||
.iter()
|
|
||||||
.position(|r| r == &e.metadata().name)
|
|
||||||
.expect("impossib7"))
|
|
||||||
});
|
|
||||||
eprintln!(
|
|
||||||
"Warning: found multiple adapters for {}:",
|
|
||||||
meta.lossy_filename
|
|
||||||
);
|
|
||||||
for mmatch in v.iter() {
|
|
||||||
eprintln!(" - {}", mmatch.metadata().name);
|
|
||||||
}
|
|
||||||
return Some(v[0].clone());
|
|
||||||
}
|
|
||||||
if mime_matches.is_empty() {
|
|
||||||
if fname_matches.is_empty() {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(fname_regexes[fname_matches[0]].1.clone())
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Some(mime_regexes[mime_matches[0]].1.clone())
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
@ -41,8 +41,8 @@ where
|
|||||||
let extension = filename.extension().map(|e| e.to_string_lossy().to_owned());
|
let extension = filename.extension().map(|e| e.to_string_lossy().to_owned());
|
||||||
match extension {
|
match extension {
|
||||||
Some(e) => Ok(match e.to_owned().as_ref() {
|
Some(e) => Ok(match e.to_owned().as_ref() {
|
||||||
"gz" => Box::new(flate2::read::MultiGzDecoder::new(inp)),
|
"tgz" | "gz" => Box::new(flate2::read::MultiGzDecoder::new(inp)),
|
||||||
"bz2" => Box::new(bzip2::read::BzDecoder::new(inp)),
|
"tbz" | "tbz2" | "bz2" => Box::new(bzip2::read::BzDecoder::new(inp)),
|
||||||
"xz" => Box::new(xz2::read::XzDecoder::new_multi_decoder(inp)),
|
"xz" => Box::new(xz2::read::XzDecoder::new_multi_decoder(inp)),
|
||||||
"zst" => Box::new(zstd::stream::read::Decoder::new(inp)?),
|
"zst" => Box::new(zstd::stream::read::Decoder::new(inp)?),
|
||||||
"tar" => Box::new(inp),
|
"tar" => Box::new(inp),
|
||||||
|
@ -2,6 +2,7 @@ use failure::Fallible;
|
|||||||
use rga::adapters::spawning::map_exe_error;
|
use rga::adapters::spawning::map_exe_error;
|
||||||
use rga::adapters::*;
|
use rga::adapters::*;
|
||||||
use rga::args::*;
|
use rga::args::*;
|
||||||
|
use rga::matching::*;
|
||||||
use ripgrep_all as rga;
|
use ripgrep_all as rga;
|
||||||
|
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
pub mod adapters;
|
pub mod adapters;
|
||||||
pub mod args;
|
pub mod args;
|
||||||
mod caching_writer;
|
mod caching_writer;
|
||||||
|
pub mod matching;
|
||||||
pub mod preproc;
|
pub mod preproc;
|
||||||
pub mod preproc_cache;
|
pub mod preproc_cache;
|
||||||
pub use caching_writer::CachingWriter;
|
pub use caching_writer::CachingWriter;
|
||||||
|
118
src/matching.rs
Normal file
118
src/matching.rs
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
/**
|
||||||
|
* Module for matching adapters to files based on file name or mime type
|
||||||
|
*/
|
||||||
|
use crate::adapters::*;
|
||||||
|
|
||||||
|
use failure::*;
|
||||||
|
|
||||||
|
use regex::{Regex, RegexSet};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
use std::iter::Iterator;
|
||||||
|
|
||||||
|
use std::rc::Rc;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum FastMatcher {
|
||||||
|
// MimeType(Regex),
|
||||||
|
/**
|
||||||
|
* without the leading dot, e.g. "jpg" or "tar.gz". Matched as /.*\.ext$/
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
FileExtension(String),
|
||||||
|
// todo: maybe add others, e.g. regex on whole filename or even paths
|
||||||
|
// todo: maybe allow matching a directory (e.g. /var/lib/postgres)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum SlowMatcher {
|
||||||
|
/// any type of fast matcher
|
||||||
|
Fast(FastMatcher),
|
||||||
|
///
|
||||||
|
/// match by exact mime type extracted using tree_magic
|
||||||
|
/// TODO: allow match ignoring suffix etc?
|
||||||
|
MimeType(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FileMeta {
|
||||||
|
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
|
||||||
|
// and since we probably only want to do only matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
|
||||||
|
pub lossy_filename: String,
|
||||||
|
// only given when slow matching is enabled
|
||||||
|
pub mimetype: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn extension_to_regex(extension: &str) -> Regex {
|
||||||
|
Regex::new(&format!(".*\\.{}", ®ex::escape(extension))).expect("we know this regex compiles")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn adapter_matcher<T: AsRef<str>>(
|
||||||
|
adapter_names: &[T],
|
||||||
|
slow: bool,
|
||||||
|
) -> Fallible<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>> {
|
||||||
|
let adapters = get_adapters_filtered(adapter_names)?;
|
||||||
|
// need order later
|
||||||
|
let adapter_names: Vec<String> = adapters.iter().map(|e| e.metadata().name.clone()).collect();
|
||||||
|
let mut fname_regexes = vec![];
|
||||||
|
let mut mime_regexes = vec![];
|
||||||
|
for adapter in adapters.into_iter() {
|
||||||
|
let metadata = adapter.metadata();
|
||||||
|
use SlowMatcher::*;
|
||||||
|
for matcher in metadata.get_matchers(slow) {
|
||||||
|
match matcher.as_ref() {
|
||||||
|
MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
|
||||||
|
Fast(FastMatcher::FileExtension(re)) => {
|
||||||
|
fname_regexes.push((extension_to_regex(re), adapter.clone()))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
|
||||||
|
let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
|
||||||
|
Ok(move |meta: FileMeta| {
|
||||||
|
let fname_matches: Vec<_> = fname_regex_set
|
||||||
|
.matches(&meta.lossy_filename)
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
let mime_matches: Vec<_> = if slow {
|
||||||
|
mime_regex_set
|
||||||
|
.matches(&meta.mimetype.expect("No mimetype?"))
|
||||||
|
.into_iter()
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
|
vec![]
|
||||||
|
};
|
||||||
|
if fname_matches.len() + mime_matches.len() > 1 {
|
||||||
|
// get first according to original priority list...
|
||||||
|
let fa = fname_matches.iter().map(|e| fname_regexes[*e].1.clone());
|
||||||
|
let fb = mime_matches.iter().map(|e| mime_regexes[*e].1.clone());
|
||||||
|
let mut v = vec![];
|
||||||
|
v.extend(fa);
|
||||||
|
v.extend(fb);
|
||||||
|
v.sort_by_key(|e| {
|
||||||
|
(adapter_names
|
||||||
|
.iter()
|
||||||
|
.position(|r| r == &e.metadata().name)
|
||||||
|
.expect("impossib7"))
|
||||||
|
});
|
||||||
|
eprintln!(
|
||||||
|
"Warning: found multiple adapters for {}:",
|
||||||
|
meta.lossy_filename
|
||||||
|
);
|
||||||
|
for mmatch in v.iter() {
|
||||||
|
eprintln!(" - {}", mmatch.metadata().name);
|
||||||
|
}
|
||||||
|
return Some(v[0].clone());
|
||||||
|
}
|
||||||
|
if mime_matches.is_empty() {
|
||||||
|
if fname_matches.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(fname_regexes[fname_matches[0]].1.clone())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Some(mime_regexes[mime_matches[0]].1.clone())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
use crate::adapters::*;
|
use crate::adapters::*;
|
||||||
use crate::args::RgaArgs;
|
use crate::args::RgaArgs;
|
||||||
|
use crate::matching::*;
|
||||||
use crate::CachingWriter;
|
use crate::CachingWriter;
|
||||||
use failure::Fallible;
|
use failure::Fallible;
|
||||||
use failure::{format_err, Error};
|
use failure::{format_err, Error};
|
||||||
|
Loading…
Reference in New Issue
Block a user