diff --git a/Cargo.lock b/Cargo.lock index 00254cf..4b7e35e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -664,6 +664,26 @@ dependencies = [ "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "paste" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "paste-impl 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro-hack 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "paste-impl" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro-hack 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "path-clean" version = "0.1.0" @@ -693,6 +713,16 @@ name = "podio" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "proc-macro-hack" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "proc-macro2" version = "0.4.30" @@ -889,6 +919,7 @@ dependencies = [ "flate2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "paste 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "rkv 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1380,11 +1411,14 @@ dependencies = [ "checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063" "checksum parking_lot 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fa7767817701cce701d5585b9c4db3cdd02086398322c1d7e8bf5094a96a2ce7" "checksum parking_lot_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cb88cb1cb3790baa6776844f968fea3be44956cf184fa1be5a03341f5491278c" +"checksum paste 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "1f4a4a1c555c6505821f9d58b8779d0f630a6b7e4e1be24ba718610acf01fa79" +"checksum paste-impl 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "26e796e623b8b257215f27e6c80a5478856cae305f5b59810ff9acdaa34570e6" "checksum path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ecba01bf2678719532c5e3059e0b5f0811273d94b397088b82e3bd0a78c78fdd" "checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" "checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f" "checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c" "checksum podio 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "780fb4b6698bbf9cf2444ea5d22411cef2953f0824b98f33cf454ec5615645bd" +"checksum proc-macro-hack 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)" = "0c1dd4172a1e1f96f709341418f49b11ea6c2d95d53dca08c0f74cbd332d9cf3" "checksum proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" "checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" "checksum quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "faf4799c5d274f3868a4aae320a0a182cbd2baee377b378f080e16a23e9d80db" diff --git a/Cargo.toml b/Cargo.toml index 77e86e5..3dc114c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,3 +40,4 @@ encoding_rs_io = "0.1.6" rusqlite = { version = "0.18.0", features=["vtab"] } # "bundled" size_format = "1.0.2" structopt = "0.2.17" +paste = "0.1.5" diff --git a/README.md b/README.md index 79300b9..d62e879 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,13 @@ similar: # considerations - matching on mime (magic bytes) instead of filename +- allow per-adapter configuration options # Development To enable debug logging: +```bash export RUST_LOG=rga=debug export RUST_BACKTRACE=1 +``` diff --git a/src/adapters/mod.rs b/src/adapters/mod.rs index 055e064..fa05f57 100644 --- a/src/adapters/mod.rs +++ b/src/adapters/mod.rs @@ -57,7 +57,7 @@ pub struct AdaptInfo<'a> { /// prefix every output line with this string to better indicate the file's location if it is in some archive pub line_prefix: &'a str, // pub adapt_subobject: &'a dyn Fn(AdaptInfo) -> Fallible<()>, - pub config: PreprocConfig, + pub config: PreprocConfig<'a>, } pub fn extension_to_regex(extension: &str) -> Regex { diff --git a/src/args.rs b/src/args.rs index 46f10c7..b08dd09 100644 --- a/src/args.rs +++ b/src/args.rs @@ -1,4 +1,3 @@ - use failure::Fallible; use log::*; use serde::{Deserialize, Serialize}; @@ -11,12 +10,30 @@ fn is_default(t: &T) -> bool { t == &T::default() } +// ugly, but serde and structopt use different methods to define defaults +macro_rules! set_default { + ($name:ident, $val:expr, $type:ty) => { + paste::item! { + fn []() -> $type { + $val + } + fn [](e: &$type) -> bool { + e == &[]() + } + } + }; +} + +set_default!(cache_compression_level, 12, u32); +set_default!(cache_max_blob_len, 2000000, u32); +set_default!(max_archive_recursion, 4, i32); + #[derive(StructOpt, Debug, Deserialize, Serialize)] -#[structopt(rename_all = "kebab-case")] -pub struct RgaOptions { +#[structopt(rename_all = "kebab-case", set_term_width = 80)] +pub struct RgaArgs { #[serde(default, skip_serializing_if = "is_default")] #[structopt(long, help = "Disable caching of results")] - pub no_cache: bool, + pub rga_no_cache: bool, #[serde(default, skip_serializing_if = "is_default")] #[structopt( @@ -25,7 +42,48 @@ pub struct RgaOptions { require_delimiter = true, help = "Change which adapters to use and in which priority order (descending)" )] - pub adapters: Vec, + pub rga_adapters: Vec, + + #[serde( + default = "def_cache_max_blob_len", + skip_serializing_if = "def_cache_max_blob_len_if" + )] + #[structopt( + long, + default_value = "2000000", + help = "Max compressed size to cache", + long_help = "Longest byte length (after compression) to store in cache. Longer adapter outputs will not be cached and recomputed every time." + )] + pub rga_cache_max_blob_len: u32, + + #[serde( + default = "def_cache_compression_level", + skip_serializing_if = "def_cache_compression_level_if" + )] + #[structopt( + long, + default_value = "12", + require_equals = true, + help = "ZSTD compression level to apply to adapter outputs before storing in cache db" + )] + pub rga_cache_compression_level: u32, + + #[serde( + default = "def_max_archive_recursion", + skip_serializing_if = "def_max_archive_recursion_if" + )] + #[structopt( + long, + default_value = "4", + require_equals = true, + help = "Maximum nestedness of archives to recurse into" + )] + pub rga_max_archive_recursion: i32, + + // these arguments stop the process, so don't serialize them + #[serde(skip)] + #[structopt(long, help = "List all known adapters")] + pub rga_list_adapters: bool, #[serde(skip)] #[structopt(long, help = "Show help for ripgrep itself")] @@ -34,15 +92,11 @@ pub struct RgaOptions { #[serde(skip)] #[structopt(long, help = "Show version of ripgrep itself")] pub rg_version: bool, - - #[serde(skip)] - #[structopt(long, help = "List all known adapters")] - pub list_adapters: bool, } static RGA_CONFIG: &str = "RGA_CONFIG"; -pub fn parse_args(args: I) -> Fallible +pub fn parse_args(args: I) -> Fallible where I: IntoIterator, I::Item: Into + Clone, @@ -53,7 +107,7 @@ where Ok(serde_json::from_str(&val)?) } Err(_) => { - let matches = RgaOptions::from_iter(args); + let matches = RgaArgs::from_iter(args); let serialized_config = serde_json::to_string(&matches)?; std::env::set_var(RGA_CONFIG, &serialized_config); debug!("{}={}", RGA_CONFIG, serialized_config); diff --git a/src/bin/rga-preproc.rs b/src/bin/rga-preproc.rs index 38448ce..90add0c 100644 --- a/src/bin/rga-preproc.rs +++ b/src/bin/rga-preproc.rs @@ -1,5 +1,3 @@ - - use failure::{format_err, Fallible}; use rga::adapters::*; @@ -23,7 +21,7 @@ fn main() -> Fallible<()> { let i = File::open(&path)?; let mut o = std::io::stdout(); - let cache = if args.no_cache { + let cache = if args.rga_no_cache { None } else { Some(rga::preproc_cache::open()?) @@ -35,10 +33,7 @@ fn main() -> Fallible<()> { oup: &mut o, line_prefix: "", archive_recursion_depth: 0, - config: PreprocConfig { - cache, - max_archive_recursion: 3, - }, + config: PreprocConfig { cache, args: &args }, }; rga_preproc(ai) diff --git a/src/bin/rga.rs b/src/bin/rga.rs index d006970..0e94a56 100644 --- a/src/bin/rga.rs +++ b/src/bin/rga.rs @@ -1,4 +1,3 @@ - use failure::Fallible; use log::*; use rga::adapters::spawning::map_exe_error; @@ -9,8 +8,8 @@ use std::ffi::OsString; use std::process::Command; use structopt::StructOpt; -fn split_args() -> Fallible<(RgaOptions, Vec)> { - let mut app = RgaOptions::clap(); +fn split_args() -> Fallible<(RgaArgs, Vec)> { + let mut app = RgaArgs::clap(); app.p.create_help_and_version(); let mut firstarg = true; @@ -65,7 +64,7 @@ fn main() -> Fallible<()> { let (args, passthrough_args) = split_args()?; let adapters = get_adapters(); - if args.list_adapters { + if args.rga_list_adapters { println!("Adapters:"); for adapter in adapters { let meta = adapter.metadata(); diff --git a/src/preproc.rs b/src/preproc.rs index 423e79d..058337b 100644 --- a/src/preproc.rs +++ b/src/preproc.rs @@ -1,18 +1,17 @@ use crate::adapters::*; +use crate::args::RgaArgs; use crate::CachingWriter; use failure::Fallible; use failure::{format_err, Error}; use path_clean::PathClean; +use std::convert::TryInto; use std::io::BufWriter; -// longest compressed conversion output to save in cache -const MAX_DB_BLOB_LEN: usize = 2_000_000; -const ZSTD_LEVEL: i32 = 12; use std::sync::{Arc, RwLock}; #[derive(Clone)] -pub struct PreprocConfig { +pub struct PreprocConfig<'a> { pub cache: Option>>, - pub max_archive_recursion: i32, + pub args: &'a RgaArgs, } /** * preprocess a file as defined in `ai`. @@ -32,15 +31,12 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> { archive_recursion_depth, .. } = ai; - let PreprocConfig { - mut cache, - max_archive_recursion, - } = config; + let PreprocConfig { mut cache, args } = config; let filename = filepath_hint .file_name() .ok_or_else(|| format_err!("Empty filename"))?; eprintln!("depth: {}", archive_recursion_depth); - if archive_recursion_depth >= config.max_archive_recursion { + if archive_recursion_depth >= args.rga_max_archive_recursion { writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?; return Ok(()); } @@ -79,8 +75,11 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> { &cache_key, Box::new(|| -> Fallible>> { // wrapping BufWriter here gives ~10% perf boost - let mut compbuf = - BufWriter::new(CachingWriter::new(oup, MAX_DB_BLOB_LEN, ZSTD_LEVEL)?); + let mut compbuf = BufWriter::new(CachingWriter::new( + oup, + args.rga_cache_max_blob_len.try_into().unwrap(), + args.rga_cache_compression_level.try_into().unwrap(), + )?); eprintln!("adapting..."); ad.adapt(AdaptInfo { line_prefix, @@ -89,10 +88,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> { inp, oup: &mut compbuf, archive_recursion_depth, - config: PreprocConfig { - cache: None, - max_archive_recursion, - }, + config: PreprocConfig { cache: None, args }, })?; let compressed = compbuf .into_inner() @@ -120,10 +116,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> { inp, oup, archive_recursion_depth, - config: PreprocConfig { - cache: None, - max_archive_recursion, - }, + config: PreprocConfig { cache: None, args }, })?; Ok(()) }