mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-24 12:24:56 +00:00
add slow matching (base)
This commit is contained in:
parent
9a036fdd4e
commit
0489a49d66
@ -15,10 +15,11 @@ lazy_static! {
|
|||||||
name: "ffmpeg".to_owned(),
|
name: "ffmpeg".to_owned(),
|
||||||
version: 1,
|
version: 1,
|
||||||
description: "Uses ffmpeg to extract video metadata and subtitles".to_owned(),
|
description: "Uses ffmpeg to extract video metadata and subtitles".to_owned(),
|
||||||
matchers: EXTENSIONS
|
fast_matchers: EXTENSIONS
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| Matcher::FileExtension(s.to_string()))
|
.map(|s| FastMatcher::FileExtension(s.to_string()))
|
||||||
.collect(),
|
.collect(),
|
||||||
|
slow_matchers: None
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,19 +9,34 @@ use crate::preproc::PreprocConfig;
|
|||||||
use failure::*;
|
use failure::*;
|
||||||
use log::*;
|
use log::*;
|
||||||
use regex::{Regex, RegexSet};
|
use regex::{Regex, RegexSet};
|
||||||
|
use std::borrow::Borrow;
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
|
use std::iter::Iterator;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
//pub use ffmpeg::FffmpegAdapter;
|
|
||||||
|
|
||||||
pub enum Matcher {
|
#[derive(Clone)]
|
||||||
|
pub enum FastMatcher {
|
||||||
// MimeType(Regex),
|
// MimeType(Regex),
|
||||||
/**
|
/**
|
||||||
* without the dot. e.g. "jpg" or "tar.gz" matched as /.*\.ext$/
|
* without the leading dot, e.g. "jpg" or "tar.gz". Matched as /.*\.ext$/
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
FileExtension(String),
|
FileExtension(String),
|
||||||
|
// todo: maybe add others, e.g. regex on whole filename or even paths
|
||||||
|
// todo: maybe allow matching a directory (e.g. /var/lib/postgres)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum SlowMatcher {
|
||||||
|
/// any type of fast matcher
|
||||||
|
Fast(FastMatcher),
|
||||||
|
///
|
||||||
|
/// match by exact mime type extracted using tree_magic
|
||||||
|
/// TODO: allow match ignoring suffix etc?
|
||||||
|
MimeType(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct AdapterMeta {
|
pub struct AdapterMeta {
|
||||||
@ -30,14 +45,32 @@ pub struct AdapterMeta {
|
|||||||
/// version identifier. used to key cache entries, change if your output format changes
|
/// version identifier. used to key cache entries, change if your output format changes
|
||||||
pub version: i32,
|
pub version: i32,
|
||||||
pub description: String,
|
pub description: String,
|
||||||
pub matchers: Vec<Matcher>,
|
/// list of matchers (interpreted as ORed)
|
||||||
|
pub fast_matchers: Vec<FastMatcher>,
|
||||||
|
/// list of matchers when we have mime type detection active (interpreted as ORed)
|
||||||
|
/// warning: this *overrides* the fast matchers
|
||||||
|
pub slow_matchers: Option<Vec<SlowMatcher>>,
|
||||||
|
}
|
||||||
|
impl AdapterMeta {
|
||||||
|
// todo: this is pretty ugly
|
||||||
|
fn get_matchers<'a>(&'a self, slow: bool) -> Box<dyn Iterator<Item = Cow<SlowMatcher>> + 'a> {
|
||||||
|
match (slow, &self.slow_matchers) {
|
||||||
|
(true, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))),
|
||||||
|
(_, _) => Box::new(
|
||||||
|
self.fast_matchers
|
||||||
|
.iter()
|
||||||
|
.map(|e| Cow::Owned(SlowMatcher::Fast(e.clone()))),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct FileMeta {
|
pub struct FileMeta {
|
||||||
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
|
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
|
||||||
// and since we probably only want to do only matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
|
// and since we probably only want to do only matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
|
||||||
pub lossy_filename: String,
|
pub lossy_filename: String,
|
||||||
// pub mimetype: String,
|
// only given when slow matching is enabled
|
||||||
|
pub mimetype: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait GetMetadata {
|
pub trait GetMetadata {
|
||||||
@ -79,7 +112,9 @@ pub fn get_adapters() -> Vec<Rc<dyn FileAdapter>> {
|
|||||||
adapters
|
adapters
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_adapters_filtered(adapter_names: &Vec<String>) -> Fallible<Vec<Rc<dyn FileAdapter>>> {
|
pub fn get_adapters_filtered<T: AsRef<str>>(
|
||||||
|
adapter_names: &[T],
|
||||||
|
) -> Fallible<Vec<Rc<dyn FileAdapter>>> {
|
||||||
let all_adapters = get_adapters();
|
let all_adapters = get_adapters();
|
||||||
let adapters = if !adapter_names.is_empty() {
|
let adapters = if !adapter_names.is_empty() {
|
||||||
let adapters_map: HashMap<_, _> = all_adapters
|
let adapters_map: HashMap<_, _> = all_adapters
|
||||||
@ -89,8 +124,8 @@ pub fn get_adapters_filtered(adapter_names: &Vec<String>) -> Fallible<Vec<Rc<dyn
|
|||||||
let mut adapters = vec![];
|
let mut adapters = vec![];
|
||||||
let mut subtractive = false;
|
let mut subtractive = false;
|
||||||
for (i, name) in adapter_names.iter().enumerate() {
|
for (i, name) in adapter_names.iter().enumerate() {
|
||||||
let mut name = &name[..];
|
let mut name = name.as_ref();
|
||||||
if i == 0 && name.starts_with("-") {
|
if i == 0 && (name.starts_with('-')) {
|
||||||
subtractive = true;
|
subtractive = true;
|
||||||
name = &name[1..];
|
name = &name[1..];
|
||||||
adapters = all_adapters.clone();
|
adapters = all_adapters.clone();
|
||||||
@ -98,7 +133,7 @@ pub fn get_adapters_filtered(adapter_names: &Vec<String>) -> Fallible<Vec<Rc<dyn
|
|||||||
if subtractive {
|
if subtractive {
|
||||||
let inx = adapters
|
let inx = adapters
|
||||||
.iter()
|
.iter()
|
||||||
.position(|a| &a.metadata().name == name)
|
.position(|a| a.metadata().name == name)
|
||||||
.ok_or_else(|| format_err!("Could not remove {}: Not in list", name))?;
|
.ok_or_else(|| format_err!("Could not remove {}: Not in list", name))?;
|
||||||
adapters.remove(inx);
|
adapters.remove(inx);
|
||||||
} else {
|
} else {
|
||||||
@ -124,34 +159,58 @@ pub fn get_adapters_filtered(adapter_names: &Vec<String>) -> Fallible<Vec<Rc<dyn
|
|||||||
);
|
);
|
||||||
Ok(adapters)
|
Ok(adapters)
|
||||||
}
|
}
|
||||||
pub fn adapter_matcher(
|
|
||||||
adapter_names: &Vec<String>,
|
pub fn adapter_matcher<T: AsRef<str>>(
|
||||||
|
adapter_names: &[T],
|
||||||
|
slow: bool,
|
||||||
) -> Fallible<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>> {
|
) -> Fallible<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>> {
|
||||||
let adapters = get_adapters_filtered(adapter_names)?;
|
let adapters = get_adapters_filtered(adapter_names)?;
|
||||||
let mut fname_regexes = vec![];
|
let mut fname_regexes = vec![];
|
||||||
//let mut mime_regexes = vec![];
|
let mut mime_regexes = vec![];
|
||||||
for adapter in adapters.into_iter() {
|
for adapter in adapters.into_iter() {
|
||||||
let metadata = adapter.metadata();
|
let metadata = adapter.metadata();
|
||||||
for matcher in &metadata.matchers {
|
use SlowMatcher::*;
|
||||||
match matcher {
|
for matcher in metadata.get_matchers(slow) {
|
||||||
//Matcher::MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
|
match matcher.as_ref() {
|
||||||
Matcher::FileExtension(re) => {
|
MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
|
||||||
|
Fast(FastMatcher::FileExtension(re)) => {
|
||||||
fname_regexes.push((extension_to_regex(re), adapter.clone()))
|
fname_regexes.push((extension_to_regex(re), adapter.clone()))
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
|
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
|
||||||
//let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
|
let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
|
||||||
Ok(move |meta: FileMeta| {
|
Ok(move |meta: FileMeta| {
|
||||||
// todo: handle multiple conflicting matches
|
let fname_matches: Vec<_> = fname_regex_set
|
||||||
let matches = fname_regex_set.matches(&meta.lossy_filename);
|
.matches(&meta.lossy_filename)
|
||||||
match matches.iter().next() {
|
.into_iter()
|
||||||
Some(m) => Some(fname_regexes[m].1.clone()),
|
.collect();
|
||||||
None => None,
|
let mime_matches: Vec<_> = if slow {
|
||||||
|
mime_regex_set
|
||||||
|
.matches(&meta.mimetype.expect("No mimetype?"))
|
||||||
|
.into_iter()
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
|
vec![]
|
||||||
|
};
|
||||||
|
if fname_matches.len() + mime_matches.len() > 1 {
|
||||||
|
eprintln!("Found multiple adapters for {}:", meta.lossy_filename);
|
||||||
|
for mmatch in mime_matches.iter() {
|
||||||
|
eprintln!(" - {}", mime_regexes[*mmatch].1.metadata().name);
|
||||||
|
}
|
||||||
|
for fmatch in fname_matches.iter() {
|
||||||
|
eprintln!(" - {}", fname_regexes[*fmatch].1.metadata().name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if mime_matches.len() == 0 {
|
||||||
|
if fname_matches.len() == 0 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(fname_regexes[fname_matches[0]].1.clone())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Some(mime_regexes[mime_matches[0]].1.clone())
|
||||||
}
|
}
|
||||||
/*for m in mime_regex_set.matches(&meta.mimetype) {
|
|
||||||
return Some(mime_regexes[m].1.clone());
|
|
||||||
}*/
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ use spawning::SpawningFileAdapter;
|
|||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
|
|
||||||
// from https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/App/FormatHeuristics.hs
|
// from https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/App/FormatHeuristics.hs
|
||||||
// excluding formats that could cause problems (db = sqlite) or that are already text formats (e.g. xml-based)
|
// excluding formats that could cause problems (.db ?= sqlite) or that are already text formats (e.g. xml-based)
|
||||||
//"db" -> Just "docbook"
|
//"db" -> Just "docbook"
|
||||||
//"adoc" -> Just "asciidoc"
|
//"adoc" -> Just "asciidoc"
|
||||||
//"asciidoc" -> Just "asciidoc"
|
//"asciidoc" -> Just "asciidoc"
|
||||||
@ -46,10 +46,11 @@ lazy_static! {
|
|||||||
name: "pandoc".to_owned(),
|
name: "pandoc".to_owned(),
|
||||||
version: 1,
|
version: 1,
|
||||||
description: "Uses pandoc to convert binary/unreadable text documents to plain text markdown-like text".to_owned(),
|
description: "Uses pandoc to convert binary/unreadable text documents to plain text markdown-like text".to_owned(),
|
||||||
matchers: EXTENSIONS
|
fast_matchers: EXTENSIONS
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| Matcher::FileExtension(s.to_string()))
|
.map(|s| FastMatcher::FileExtension(s.to_string()))
|
||||||
.collect(),
|
.collect(),
|
||||||
|
slow_matchers: None
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
@ -12,10 +12,11 @@ lazy_static! {
|
|||||||
version: 1,
|
version: 1,
|
||||||
description: "Uses pdftotext (from poppler-utils) to extract plain text from PDF files"
|
description: "Uses pdftotext (from poppler-utils) to extract plain text from PDF files"
|
||||||
.to_owned(),
|
.to_owned(),
|
||||||
matchers: EXTENSIONS
|
fast_matchers: EXTENSIONS
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| Matcher::FileExtension(s.to_string()))
|
.map(|s| FastMatcher::FileExtension(s.to_string()))
|
||||||
.collect(),
|
.collect(),
|
||||||
|
slow_matchers: None
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
@ -14,10 +14,13 @@ lazy_static! {
|
|||||||
description:
|
description:
|
||||||
"Uses sqlite bindings to convert sqlite databases into a simple plain text format"
|
"Uses sqlite bindings to convert sqlite databases into a simple plain text format"
|
||||||
.to_owned(),
|
.to_owned(),
|
||||||
matchers: EXTENSIONS
|
fast_matchers: EXTENSIONS
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| Matcher::FileExtension(s.to_string()))
|
.map(|s| FastMatcher::FileExtension(s.to_string()))
|
||||||
.collect(),
|
.collect(),
|
||||||
|
slow_matchers: Some(vec![SlowMatcher::MimeType(
|
||||||
|
"application/x-sqlite3".to_owned()
|
||||||
|
)])
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,10 +13,11 @@ lazy_static! {
|
|||||||
name: "tar".to_owned(),
|
name: "tar".to_owned(),
|
||||||
version: 1,
|
version: 1,
|
||||||
description: "Reads a tar file as a stream and recurses down into its contents".to_owned(),
|
description: "Reads a tar file as a stream and recurses down into its contents".to_owned(),
|
||||||
matchers: EXTENSIONS
|
fast_matchers: EXTENSIONS
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| Matcher::FileExtension(s.to_string()))
|
.map(|s| FastMatcher::FileExtension(s.to_string()))
|
||||||
.collect(),
|
.collect(),
|
||||||
|
slow_matchers: None
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
@ -14,10 +14,11 @@ lazy_static! {
|
|||||||
name: "zip".to_owned(),
|
name: "zip".to_owned(),
|
||||||
version: 1,
|
version: 1,
|
||||||
description: "Reads a zip file as a stream and recurses down into its contents".to_owned(),
|
description: "Reads a zip file as a stream and recurses down into its contents".to_owned(),
|
||||||
matchers: EXTENSIONS
|
fast_matchers: EXTENSIONS
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| Matcher::FileExtension(s.to_string()))
|
.map(|s| FastMatcher::FileExtension(s.to_string()))
|
||||||
.collect(),
|
.collect(),
|
||||||
|
slow_matchers: None
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
31
src/args.rs
31
src/args.rs
@ -32,58 +32,65 @@ set_default!(max_archive_recursion, 4, i32);
|
|||||||
#[structopt(rename_all = "kebab-case", set_term_width = 80)]
|
#[structopt(rename_all = "kebab-case", set_term_width = 80)]
|
||||||
pub struct RgaArgs {
|
pub struct RgaArgs {
|
||||||
#[serde(default, skip_serializing_if = "is_default")]
|
#[serde(default, skip_serializing_if = "is_default")]
|
||||||
#[structopt(long, help = "Disable caching of results")]
|
#[structopt(long = "--rga-no-cache", help = "Disable caching of results")]
|
||||||
pub rga_no_cache: bool,
|
pub no_cache: bool,
|
||||||
|
|
||||||
#[serde(default, skip_serializing_if = "is_default")]
|
#[serde(default, skip_serializing_if = "is_default")]
|
||||||
#[structopt(
|
#[structopt(
|
||||||
long,
|
long = "--rga-accurate",
|
||||||
|
help = "Use more accurate but slower matching by mime type"
|
||||||
|
)]
|
||||||
|
pub accurate: bool,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "is_default")]
|
||||||
|
#[structopt(
|
||||||
|
long = "--rga-adapters",
|
||||||
require_equals = true,
|
require_equals = true,
|
||||||
require_delimiter = true,
|
require_delimiter = true,
|
||||||
help = "Change which adapters to use and in which priority order (descending)"
|
help = "Change which adapters to use and in which priority order (descending)"
|
||||||
)]
|
)]
|
||||||
pub rga_adapters: Vec<String>,
|
pub adapters: Vec<String>,
|
||||||
|
|
||||||
#[serde(
|
#[serde(
|
||||||
default = "def_cache_max_blob_len",
|
default = "def_cache_max_blob_len",
|
||||||
skip_serializing_if = "def_cache_max_blob_len_if"
|
skip_serializing_if = "def_cache_max_blob_len_if"
|
||||||
)]
|
)]
|
||||||
#[structopt(
|
#[structopt(
|
||||||
long,
|
long = "--rga-cache-max-blob-len",
|
||||||
default_value = "2000000",
|
default_value = "2000000",
|
||||||
help = "Max compressed size to cache",
|
help = "Max compressed size to cache",
|
||||||
long_help = "Longest byte length (after compression) to store in cache. Longer adapter outputs will not be cached and recomputed every time."
|
long_help = "Longest byte length (after compression) to store in cache. Longer adapter outputs will not be cached and recomputed every time."
|
||||||
)]
|
)]
|
||||||
pub rga_cache_max_blob_len: u32,
|
pub cache_max_blob_len: u32,
|
||||||
|
|
||||||
#[serde(
|
#[serde(
|
||||||
default = "def_cache_compression_level",
|
default = "def_cache_compression_level",
|
||||||
skip_serializing_if = "def_cache_compression_level_if"
|
skip_serializing_if = "def_cache_compression_level_if"
|
||||||
)]
|
)]
|
||||||
#[structopt(
|
#[structopt(
|
||||||
long,
|
long = "--rga-cache-compression-level",
|
||||||
default_value = "12",
|
default_value = "12",
|
||||||
require_equals = true,
|
require_equals = true,
|
||||||
help = "ZSTD compression level to apply to adapter outputs before storing in cache db"
|
help = "ZSTD compression level to apply to adapter outputs before storing in cache db"
|
||||||
)]
|
)]
|
||||||
pub rga_cache_compression_level: u32,
|
pub cache_compression_level: u32,
|
||||||
|
|
||||||
#[serde(
|
#[serde(
|
||||||
default = "def_max_archive_recursion",
|
default = "def_max_archive_recursion",
|
||||||
skip_serializing_if = "def_max_archive_recursion_if"
|
skip_serializing_if = "def_max_archive_recursion_if"
|
||||||
)]
|
)]
|
||||||
#[structopt(
|
#[structopt(
|
||||||
long,
|
long = "--rga-max-archive-recursion",
|
||||||
default_value = "4",
|
default_value = "4",
|
||||||
require_equals = true,
|
require_equals = true,
|
||||||
help = "Maximum nestedness of archives to recurse into"
|
help = "Maximum nestedness of archives to recurse into"
|
||||||
)]
|
)]
|
||||||
pub rga_max_archive_recursion: i32,
|
pub max_archive_recursion: i32,
|
||||||
|
|
||||||
// these arguments stop the process, so don't serialize them
|
// these arguments stop the process, so don't serialize them
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
#[structopt(long, help = "List all known adapters")]
|
#[structopt(long = "--rga-list-adapters", help = "List all known adapters")]
|
||||||
pub rga_list_adapters: bool,
|
pub list_adapters: bool,
|
||||||
|
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
#[structopt(long, help = "Show help for ripgrep itself")]
|
#[structopt(long, help = "Show help for ripgrep itself")]
|
||||||
|
@ -21,7 +21,7 @@ fn main() -> Fallible<()> {
|
|||||||
|
|
||||||
let i = File::open(&path)?;
|
let i = File::open(&path)?;
|
||||||
let mut o = std::io::stdout();
|
let mut o = std::io::stdout();
|
||||||
let cache = if args.rga_no_cache {
|
let cache = if args.no_cache {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
Some(rga::preproc_cache::open()?)
|
Some(rga::preproc_cache::open()?)
|
||||||
|
@ -62,17 +62,17 @@ fn main() -> Fallible<()> {
|
|||||||
env_logger::init();
|
env_logger::init();
|
||||||
|
|
||||||
let (args, passthrough_args) = split_args()?;
|
let (args, passthrough_args) = split_args()?;
|
||||||
let adapters = get_adapters_filtered(&args.rga_adapters)?;
|
let adapters = get_adapters_filtered(&args.adapters)?;
|
||||||
|
|
||||||
if args.rga_list_adapters {
|
if args.list_adapters {
|
||||||
println!("Adapters:\n");
|
println!("Adapters:\n");
|
||||||
for adapter in adapters {
|
for adapter in adapters {
|
||||||
let meta = adapter.metadata();
|
let meta = adapter.metadata();
|
||||||
let matchers = meta
|
let matchers = meta
|
||||||
.matchers
|
.fast_matchers
|
||||||
.iter()
|
.iter()
|
||||||
.map(|m| match m {
|
.map(|m| match m {
|
||||||
Matcher::FileExtension(ext) => format!(".{}", ext),
|
FastMatcher::FileExtension(ext) => format!(".{}", ext),
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
.join(", ");
|
.join(", ");
|
||||||
@ -87,9 +87,9 @@ fn main() -> Fallible<()> {
|
|||||||
|
|
||||||
let extensions = adapters
|
let extensions = adapters
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|a| &a.metadata().matchers)
|
.flat_map(|a| &a.metadata().fast_matchers)
|
||||||
.filter_map(|m| match m {
|
.filter_map(|m| match m {
|
||||||
Matcher::FileExtension(ext) => Some(ext as &str),
|
FastMatcher::FileExtension(ext) => Some(ext as &str),
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
.join(",");
|
.join(",");
|
||||||
|
@ -47,7 +47,7 @@ impl<W: Write> Write for CachingWriter<W> {
|
|||||||
Some(writer) => {
|
Some(writer) => {
|
||||||
let wrote = writer.write(buf)?;
|
let wrote = writer.write(buf)?;
|
||||||
let compressed_len = writer.get_ref().len();
|
let compressed_len = writer.get_ref().len();
|
||||||
//eprintln!("wrote {} to zstd, len now {}", wrote, compressed_len);
|
trace!("wrote {} to zstd, len now {}", wrote, compressed_len);
|
||||||
if compressed_len > self.max_cache_size {
|
if compressed_len > self.max_cache_size {
|
||||||
eprintln!("cache longer than max, dropping");
|
eprintln!("cache longer than max, dropping");
|
||||||
//writer.finish();
|
//writer.finish();
|
||||||
|
@ -20,7 +20,6 @@ pub struct PreprocConfig<'a> {
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
|
pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
|
||||||
let adapters = adapter_matcher(&ai.config.args.rga_adapters)?;
|
|
||||||
let AdaptInfo {
|
let AdaptInfo {
|
||||||
filepath_hint,
|
filepath_hint,
|
||||||
is_real_file,
|
is_real_file,
|
||||||
@ -32,11 +31,12 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
|
|||||||
..
|
..
|
||||||
} = ai;
|
} = ai;
|
||||||
let PreprocConfig { mut cache, args } = config;
|
let PreprocConfig { mut cache, args } = config;
|
||||||
|
let adapters = adapter_matcher(&args.adapters[..], args.accurate)?;
|
||||||
let filename = filepath_hint
|
let filename = filepath_hint
|
||||||
.file_name()
|
.file_name()
|
||||||
.ok_or_else(|| format_err!("Empty filename"))?;
|
.ok_or_else(|| format_err!("Empty filename"))?;
|
||||||
eprintln!("depth: {}", archive_recursion_depth);
|
eprintln!("depth: {}", archive_recursion_depth);
|
||||||
if archive_recursion_depth >= args.rga_max_archive_recursion {
|
if archive_recursion_depth >= args.max_archive_recursion {
|
||||||
writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?;
|
writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
@ -49,7 +49,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
|
|||||||
)))?;
|
)))?;
|
||||||
println!("mimetype: {:?}", mimetype);*/
|
println!("mimetype: {:?}", mimetype);*/
|
||||||
let adapter = adapters(FileMeta {
|
let adapter = adapters(FileMeta {
|
||||||
// mimetype,
|
mimetype: None,
|
||||||
lossy_filename: filename.to_string_lossy().to_string(),
|
lossy_filename: filename.to_string_lossy().to_string(),
|
||||||
});
|
});
|
||||||
match adapter {
|
match adapter {
|
||||||
@ -77,8 +77,8 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
|
|||||||
// wrapping BufWriter here gives ~10% perf boost
|
// wrapping BufWriter here gives ~10% perf boost
|
||||||
let mut compbuf = BufWriter::new(CachingWriter::new(
|
let mut compbuf = BufWriter::new(CachingWriter::new(
|
||||||
oup,
|
oup,
|
||||||
args.rga_cache_max_blob_len.try_into().unwrap(),
|
args.cache_max_blob_len.try_into().unwrap(),
|
||||||
args.rga_cache_compression_level.try_into().unwrap(),
|
args.cache_compression_level.try_into().unwrap(),
|
||||||
)?);
|
)?);
|
||||||
eprintln!("adapting...");
|
eprintln!("adapting...");
|
||||||
ad.adapt(AdaptInfo {
|
ad.adapt(AdaptInfo {
|
||||||
|
Loading…
Reference in New Issue
Block a user