more options, less constants

This commit is contained in:
phiresky 2019-06-07 23:04:18 +02:00
parent 0cedf72de6
commit d06d27709b
8 changed files with 122 additions and 43 deletions

34
Cargo.lock generated
View File

@ -664,6 +664,26 @@ dependencies = [
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "paste"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"paste-impl 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro-hack 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "paste-impl"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro-hack 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "path-clean"
version = "0.1.0"
@ -693,6 +713,16 @@ name = "podio"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "proc-macro-hack"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "proc-macro2"
version = "0.4.30"
@ -889,6 +919,7 @@ dependencies = [
"flate2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"paste 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"rkv 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1380,11 +1411,14 @@ dependencies = [
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum parking_lot 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fa7767817701cce701d5585b9c4db3cdd02086398322c1d7e8bf5094a96a2ce7"
"checksum parking_lot_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cb88cb1cb3790baa6776844f968fea3be44956cf184fa1be5a03341f5491278c"
"checksum paste 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "1f4a4a1c555c6505821f9d58b8779d0f630a6b7e4e1be24ba718610acf01fa79"
"checksum paste-impl 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "26e796e623b8b257215f27e6c80a5478856cae305f5b59810ff9acdaa34570e6"
"checksum path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ecba01bf2678719532c5e3059e0b5f0811273d94b397088b82e3bd0a78c78fdd"
"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c"
"checksum podio 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "780fb4b6698bbf9cf2444ea5d22411cef2953f0824b98f33cf454ec5615645bd"
"checksum proc-macro-hack 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)" = "0c1dd4172a1e1f96f709341418f49b11ea6c2d95d53dca08c0f74cbd332d9cf3"
"checksum proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759"
"checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0"
"checksum quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "faf4799c5d274f3868a4aae320a0a182cbd2baee377b378f080e16a23e9d80db"

View File

@ -40,3 +40,4 @@ encoding_rs_io = "0.1.6"
rusqlite = { version = "0.18.0", features=["vtab"] } # "bundled"
size_format = "1.0.2"
structopt = "0.2.17"
paste = "0.1.5"

View File

@ -10,10 +10,13 @@ similar:
# considerations
- matching on mime (magic bytes) instead of filename
- allow per-adapter configuration options
# Development
To enable debug logging:
```bash
export RUST_LOG=rga=debug
export RUST_BACKTRACE=1
```

View File

@ -57,7 +57,7 @@ pub struct AdaptInfo<'a> {
/// prefix every output line with this string to better indicate the file's location if it is in some archive
pub line_prefix: &'a str,
// pub adapt_subobject: &'a dyn Fn(AdaptInfo) -> Fallible<()>,
pub config: PreprocConfig,
pub config: PreprocConfig<'a>,
}
pub fn extension_to_regex(extension: &str) -> Regex {

View File

@ -1,4 +1,3 @@
use failure::Fallible;
use log::*;
use serde::{Deserialize, Serialize};
@ -11,12 +10,30 @@ fn is_default<T: Default + PartialEq>(t: &T) -> bool {
t == &T::default()
}
// ugly, but serde and structopt use different methods to define defaults
macro_rules! set_default {
($name:ident, $val:expr, $type:ty) => {
paste::item! {
fn [<def_ $name>]() -> $type {
$val
}
fn [<def_ $name _if>](e: &$type) -> bool {
e == &[<def_ $name>]()
}
}
};
}
set_default!(cache_compression_level, 12, u32);
set_default!(cache_max_blob_len, 2000000, u32);
set_default!(max_archive_recursion, 4, i32);
#[derive(StructOpt, Debug, Deserialize, Serialize)]
#[structopt(rename_all = "kebab-case")]
pub struct RgaOptions {
#[structopt(rename_all = "kebab-case", set_term_width = 80)]
pub struct RgaArgs {
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(long, help = "Disable caching of results")]
pub no_cache: bool,
pub rga_no_cache: bool,
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(
@ -25,7 +42,48 @@ pub struct RgaOptions {
require_delimiter = true,
help = "Change which adapters to use and in which priority order (descending)"
)]
pub adapters: Vec<String>,
pub rga_adapters: Vec<String>,
#[serde(
default = "def_cache_max_blob_len",
skip_serializing_if = "def_cache_max_blob_len_if"
)]
#[structopt(
long,
default_value = "2000000",
help = "Max compressed size to cache",
long_help = "Longest byte length (after compression) to store in cache. Longer adapter outputs will not be cached and recomputed every time."
)]
pub rga_cache_max_blob_len: u32,
#[serde(
default = "def_cache_compression_level",
skip_serializing_if = "def_cache_compression_level_if"
)]
#[structopt(
long,
default_value = "12",
require_equals = true,
help = "ZSTD compression level to apply to adapter outputs before storing in cache db"
)]
pub rga_cache_compression_level: u32,
#[serde(
default = "def_max_archive_recursion",
skip_serializing_if = "def_max_archive_recursion_if"
)]
#[structopt(
long,
default_value = "4",
require_equals = true,
help = "Maximum nestedness of archives to recurse into"
)]
pub rga_max_archive_recursion: i32,
// these arguments stop the process, so don't serialize them
#[serde(skip)]
#[structopt(long, help = "List all known adapters")]
pub rga_list_adapters: bool,
#[serde(skip)]
#[structopt(long, help = "Show help for ripgrep itself")]
@ -34,15 +92,11 @@ pub struct RgaOptions {
#[serde(skip)]
#[structopt(long, help = "Show version of ripgrep itself")]
pub rg_version: bool,
#[serde(skip)]
#[structopt(long, help = "List all known adapters")]
pub list_adapters: bool,
}
static RGA_CONFIG: &str = "RGA_CONFIG";
pub fn parse_args<I>(args: I) -> Fallible<RgaOptions>
pub fn parse_args<I>(args: I) -> Fallible<RgaArgs>
where
I: IntoIterator,
I::Item: Into<OsString> + Clone,
@ -53,7 +107,7 @@ where
Ok(serde_json::from_str(&val)?)
}
Err(_) => {
let matches = RgaOptions::from_iter(args);
let matches = RgaArgs::from_iter(args);
let serialized_config = serde_json::to_string(&matches)?;
std::env::set_var(RGA_CONFIG, &serialized_config);
debug!("{}={}", RGA_CONFIG, serialized_config);

View File

@ -1,5 +1,3 @@
use failure::{format_err, Fallible};
use rga::adapters::*;
@ -23,7 +21,7 @@ fn main() -> Fallible<()> {
let i = File::open(&path)?;
let mut o = std::io::stdout();
let cache = if args.no_cache {
let cache = if args.rga_no_cache {
None
} else {
Some(rga::preproc_cache::open()?)
@ -35,10 +33,7 @@ fn main() -> Fallible<()> {
oup: &mut o,
line_prefix: "",
archive_recursion_depth: 0,
config: PreprocConfig {
cache,
max_archive_recursion: 3,
},
config: PreprocConfig { cache, args: &args },
};
rga_preproc(ai)

View File

@ -1,4 +1,3 @@
use failure::Fallible;
use log::*;
use rga::adapters::spawning::map_exe_error;
@ -9,8 +8,8 @@ use std::ffi::OsString;
use std::process::Command;
use structopt::StructOpt;
fn split_args() -> Fallible<(RgaOptions, Vec<OsString>)> {
let mut app = RgaOptions::clap();
fn split_args() -> Fallible<(RgaArgs, Vec<OsString>)> {
let mut app = RgaArgs::clap();
app.p.create_help_and_version();
let mut firstarg = true;
@ -65,7 +64,7 @@ fn main() -> Fallible<()> {
let (args, passthrough_args) = split_args()?;
let adapters = get_adapters();
if args.list_adapters {
if args.rga_list_adapters {
println!("Adapters:");
for adapter in adapters {
let meta = adapter.metadata();

View File

@ -1,18 +1,17 @@
use crate::adapters::*;
use crate::args::RgaArgs;
use crate::CachingWriter;
use failure::Fallible;
use failure::{format_err, Error};
use path_clean::PathClean;
use std::convert::TryInto;
use std::io::BufWriter;
// longest compressed conversion output to save in cache
const MAX_DB_BLOB_LEN: usize = 2_000_000;
const ZSTD_LEVEL: i32 = 12;
use std::sync::{Arc, RwLock};
#[derive(Clone)]
pub struct PreprocConfig {
pub struct PreprocConfig<'a> {
pub cache: Option<Arc<RwLock<dyn crate::preproc_cache::PreprocCache>>>,
pub max_archive_recursion: i32,
pub args: &'a RgaArgs,
}
/**
* preprocess a file as defined in `ai`.
@ -32,15 +31,12 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
archive_recursion_depth,
..
} = ai;
let PreprocConfig {
mut cache,
max_archive_recursion,
} = config;
let PreprocConfig { mut cache, args } = config;
let filename = filepath_hint
.file_name()
.ok_or_else(|| format_err!("Empty filename"))?;
eprintln!("depth: {}", archive_recursion_depth);
if archive_recursion_depth >= config.max_archive_recursion {
if archive_recursion_depth >= args.rga_max_archive_recursion {
writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?;
return Ok(());
}
@ -79,8 +75,11 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
&cache_key,
Box::new(|| -> Fallible<Option<Vec<u8>>> {
// wrapping BufWriter here gives ~10% perf boost
let mut compbuf =
BufWriter::new(CachingWriter::new(oup, MAX_DB_BLOB_LEN, ZSTD_LEVEL)?);
let mut compbuf = BufWriter::new(CachingWriter::new(
oup,
args.rga_cache_max_blob_len.try_into().unwrap(),
args.rga_cache_compression_level.try_into().unwrap(),
)?);
eprintln!("adapting...");
ad.adapt(AdaptInfo {
line_prefix,
@ -89,10 +88,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
inp,
oup: &mut compbuf,
archive_recursion_depth,
config: PreprocConfig {
cache: None,
max_archive_recursion,
},
config: PreprocConfig { cache: None, args },
})?;
let compressed = compbuf
.into_inner()
@ -120,10 +116,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
inp,
oup,
archive_recursion_depth,
config: PreprocConfig {
cache: None,
max_archive_recursion,
},
config: PreprocConfig { cache: None, args },
})?;
Ok(())
}