ripgrep-all/src/args.rs

227 lines
7.7 KiB
Rust
Raw Normal View History

use anyhow::*;
2019-06-07 19:46:03 +00:00
use log::*;
2019-06-07 19:46:03 +00:00
use serde::{Deserialize, Serialize};
2019-06-07 19:46:17 +00:00
2019-06-07 19:46:03 +00:00
use std::ffi::OsString;
use std::{iter::IntoIterator, str::FromStr};
2019-06-07 19:46:17 +00:00
2019-06-07 19:46:03 +00:00
use structopt::StructOpt;
#[derive(Debug, Deserialize, Serialize)]
struct ReadableBytesCount(i64);
fn parse_readable_bytes_str(s: &str) -> Result<i64, Error> {
let suffix = s.chars().last();
if let Some(suffix) = suffix {
match suffix {
'k' | 'M' | 'G' => i64::from_str(s.trim_end_matches(suffix))
.with_context(|| format!("Could not parse int"))
.map(|e| {
e * match suffix {
'k' => 1000,
'M' => 1000_000,
'G' => 1000_000_000,
_ => panic!("impossible"),
}
}),
_ => i64::from_str(s).with_context(|| format!("Could not parse int")),
}
} else {
Err(format_err!("empty byte input"))
}
}
2019-06-07 19:46:03 +00:00
fn is_default<T: Default + PartialEq>(t: &T) -> bool {
t == &T::default()
}
// ugly, but serde and structopt use different methods to define defaults, so need to declare defaults twice
2019-06-07 21:04:18 +00:00
macro_rules! set_default {
($name:ident, $val:expr, $type:ty) => {
paste::item! {
fn [<def_ $name>]() -> $type {
$val
}
fn [<def_ $name _if>](e: &$type) -> bool {
e == &[<def_ $name>]()
}
}
};
}
set_default!(cache_compression_level, 12, u32);
set_default!(cache_max_blob_len, 2000000, i64);
2019-06-07 21:04:18 +00:00
set_default!(max_archive_recursion, 4, i32);
2019-06-07 19:46:03 +00:00
#[derive(StructOpt, Debug, Deserialize, Serialize)]
2019-06-15 09:00:45 +00:00
#[structopt(
2020-05-19 09:10:11 +00:00
name = "ripgrep-all",
2019-06-15 09:00:45 +00:00
rename_all = "kebab-case",
2020-05-19 09:10:11 +00:00
about = env!("CARGO_PKG_DESCRIPTION"),
author = env!("CARGO_PKG_HOMEPAGE"),
2019-06-15 09:00:45 +00:00
// TODO: long_about does not seem to work to only show this on short help
2020-06-06 13:01:53 +00:00
after_help = "-h shows a concise overview, --help shows more detail and advanced options.\n\nAll other options not shown here are passed directly to rg, especially [PATTERN] and [PATH ...]",
usage = "rga [RGA OPTIONS] [RG OPTIONS] PATTERN [PATH ...]"
2019-06-15 09:00:45 +00:00
)]
2019-06-07 21:04:18 +00:00
pub struct RgaArgs {
2019-06-07 19:46:03 +00:00
#[serde(default, skip_serializing_if = "is_default")]
2019-06-12 20:55:18 +00:00
#[structopt(long = "--rga-no-cache")]
/// Disable caching of results
///
/// By default, rga caches the extracted text, if it is small enough,
2020-06-06 13:01:53 +00:00
/// to a database in ~/.cache/rga on Linux,
/// ~/Library/Caches/rga on macOS,
/// or C:\Users\username\AppData\Local\rga on Windows.
2019-06-12 20:55:18 +00:00
/// This way, repeated searches on the same set of files will be much faster.
/// If you pass this flag, all caching will be disabled.
2019-06-11 11:34:04 +00:00
pub no_cache: bool,
2019-06-07 19:46:03 +00:00
#[serde(default, skip_serializing_if = "is_default")]
2019-06-12 20:55:18 +00:00
#[structopt(long = "--rga-accurate")]
/// Use more accurate but slower matching by mime type
///
/// By default, rga will match files using file extensions.
/// Some programs, such as sqlite3, don't care about the file extension at all,
/// so users sometimes use any or no extension at all. With this flag, rga
/// will try to detect the mime type of input files using the magic bytes
/// (similar to the `file` utility), and use that to choose the adapter.
2019-06-13 13:18:14 +00:00
/// Detection is only done on the first 8KiB of the file, since we can't always seek on the input (in archives).
2019-06-11 11:34:04 +00:00
pub accurate: bool,
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(
long = "--rga-adapters",
2019-06-07 19:46:03 +00:00
require_equals = true,
2019-06-12 20:55:18 +00:00
require_delimiter = true
2019-06-07 19:46:03 +00:00
)]
2019-06-12 20:55:18 +00:00
/// Change which adapters to use and in which priority order (descending)
///
/// "foo,bar" means use only adapters foo and bar.
/// "-bar,baz" means use all default adapters except for bar and baz.
/// "+bar,baz" means use all default adapters and also bar and baz.
2019-06-11 11:34:04 +00:00
pub adapters: Vec<String>,
2019-06-07 21:04:18 +00:00
#[serde(
default = "def_cache_max_blob_len",
skip_serializing_if = "def_cache_max_blob_len_if"
)]
2019-06-15 09:00:45 +00:00
#[structopt(
long = "--rga-cache-max-blob-len",
default_value = "2000000",
hidden_short_help = true,
require_equals = true,
parse(try_from_str = parse_readable_bytes_str)
2019-06-15 09:00:45 +00:00
)]
2019-06-12 20:55:18 +00:00
/// Max compressed size to cache
///
/// Longest byte length (after compression) to store in cache. Longer adapter outputs will not be cached and recomputed every time. Allowed suffixes: k M G
pub cache_max_blob_len: i64,
2019-06-07 21:04:18 +00:00
#[serde(
default = "def_cache_compression_level",
skip_serializing_if = "def_cache_compression_level_if"
)]
#[structopt(
2019-06-11 11:34:04 +00:00
long = "--rga-cache-compression-level",
2019-06-12 20:55:18 +00:00
hidden_short_help = true,
2019-06-07 21:04:18 +00:00
default_value = "12",
require_equals = true,
2019-06-12 20:55:18 +00:00
help = ""
2019-06-07 21:04:18 +00:00
)]
2019-06-12 20:55:18 +00:00
/// ZSTD compression level to apply to adapter outputs before storing in cache db
///
/// Ranges from 1 - 22
2019-06-11 11:34:04 +00:00
pub cache_compression_level: u32,
2019-06-07 21:04:18 +00:00
#[serde(
default = "def_max_archive_recursion",
skip_serializing_if = "def_max_archive_recursion_if"
)]
#[structopt(
2019-06-11 11:34:04 +00:00
long = "--rga-max-archive-recursion",
2019-06-07 21:04:18 +00:00
default_value = "4",
require_equals = true,
2019-06-15 09:00:45 +00:00
help = "Maximum nestedness of archives to recurse into",
hidden_short_help = true
2019-06-07 21:04:18 +00:00
)]
2019-06-11 11:34:04 +00:00
pub max_archive_recursion: i32,
2019-06-07 21:04:18 +00:00
// these arguments stop the process, so don't serialize them
#[serde(skip)]
2019-06-11 11:34:04 +00:00
#[structopt(long = "--rga-list-adapters", help = "List all known adapters")]
pub list_adapters: bool,
2019-06-07 19:46:03 +00:00
#[serde(skip)]
#[structopt(long, help = "Show help for ripgrep itself")]
pub rg_help: bool,
#[serde(skip)]
#[structopt(long, help = "Show version of ripgrep itself")]
pub rg_version: bool,
}
static RGA_CONFIG: &str = "RGA_CONFIG";
pub fn parse_args<I>(args: I) -> Result<RgaArgs>
2019-06-07 19:46:03 +00:00
where
I: IntoIterator,
I::Item: Into<OsString> + Clone,
{
match std::env::var(RGA_CONFIG) {
Ok(val) => {
2019-06-12 15:23:30 +00:00
debug!(
"Loading args from env {}={}, ignoring cmd args",
RGA_CONFIG, val
);
2019-06-07 19:46:03 +00:00
Ok(serde_json::from_str(&val)?)
}
Err(_) => {
2019-06-07 21:04:18 +00:00
let matches = RgaArgs::from_iter(args);
2019-06-07 19:46:03 +00:00
let serialized_config = serde_json::to_string(&matches)?;
std::env::set_var(RGA_CONFIG, &serialized_config);
debug!("{}={}", RGA_CONFIG, serialized_config);
Ok(matches)
}
}
}
2019-06-11 11:43:01 +00:00
/// Split arguments into the ones we care about and the ones rg cares about
pub fn split_args() -> Result<(RgaArgs, Vec<OsString>)> {
2019-06-11 11:43:01 +00:00
let mut app = RgaArgs::clap();
app.p.create_help_and_version();
let mut firstarg = true;
// debug!("{:#?}", app.p.flags);
let (our_args, mut passthrough_args): (Vec<OsString>, Vec<OsString>) = std::env::args_os()
.partition(|os_arg| {
if firstarg {
// hacky, but .enumerate() would be ugly because partition is too simplistic
firstarg = false;
return true;
}
if let Some(arg) = os_arg.to_str() {
arg.starts_with("--rga-")
|| arg.starts_with("--rg-")
|| arg == "--help"
|| arg == "-h"
|| arg == "--version"
|| arg == "-V"
} else {
// args that are not unicode can only be filenames, pass them to rg
false
}
});
debug!("our_args: {:?}", our_args);
let matches = parse_args(our_args)?;
if matches.rg_help {
passthrough_args.insert(0, "--help".into());
}
if matches.rg_version {
passthrough_args.insert(0, "--version".into());
}
debug!("passthrough_args: {:?}", passthrough_args);
Ok((matches, passthrough_args))
}