config file (WIP)

This commit is contained in:
phiresky 2020-06-08 23:11:43 +02:00
parent 0001feb24b
commit 963524bbf5
10 changed files with 321 additions and 104 deletions

128
Cargo.lock generated
View File

@ -36,6 +36,12 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
[[package]]
name = "arrayvec"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
[[package]]
name = "atty"
version = "0.2.14"
@ -59,6 +65,12 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
[[package]]
name = "base64"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7"
[[package]]
name = "bincode"
version = "1.2.1"
@ -75,6 +87,17 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "blake2b_simd"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8fb2d74254a3a0b5cac33ac9f8ed0e44aa50378d9dbb2e5d83bd21ed1dc2c8a"
dependencies = [
"arrayref",
"arrayvec",
"constant_time_eq",
]
[[package]]
name = "byteorder"
version = "1.3.4"
@ -102,12 +125,6 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "cachedir"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06509d1f4ffa658939bd23f076cd929ef218241363796551528e7eec69128c8"
[[package]]
name = "cc"
version = "1.0.54"
@ -159,6 +176,12 @@ dependencies = [
"bitflags",
]
[[package]]
name = "constant_time_eq"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
[[package]]
name = "crc32fast"
version = "1.2.0"
@ -239,6 +262,38 @@ dependencies = [
"lazy_static",
]
[[package]]
name = "derive_more"
version = "0.99.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2127768764f1556535c01b5326ef94bd60ff08dcfbdc544d53e69ed155610f5d"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "directories-next"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21eaa04e945bec7e2dc7383817c566881d9a83d20a07cc949b54585873585a48"
dependencies = [
"cfg-if",
"dirs-sys-next",
]
[[package]]
name = "dirs-sys-next"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c60f7b8a8953926148223260454befb50c751d3c50e1c178c4fd1ace4083c9a"
dependencies = [
"libc",
"redox_users",
"winapi",
]
[[package]]
name = "either"
version = "1.5.3"
@ -957,6 +1012,17 @@ version = "0.1.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
[[package]]
name = "redox_users"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09b23093265f8d200fa7b4c2c76297f47e681c655f6f1285a8780d6a022f7431"
dependencies = [
"getrandom",
"redox_syscall",
"rust-argon2",
]
[[package]]
name = "regex"
version = "1.3.9"
@ -991,10 +1057,11 @@ dependencies = [
"anyhow",
"bincode",
"bzip2",
"cachedir",
"chrono",
"clap",
"crossbeam",
"derive_more",
"directories-next",
"encoding_rs",
"encoding_rs_io",
"env_logger",
@ -1007,6 +1074,7 @@ dependencies = [
"regex",
"rkv",
"rusqlite",
"schemars",
"serde",
"serde_json",
"size_format",
@ -1056,6 +1124,18 @@ dependencies = [
"time",
]
[[package]]
name = "rust-argon2"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bc8af4bda8e1ff4932523b94d3dd20ee30a87232323eda55903ffd71d2fb017"
dependencies = [
"base64",
"blake2b_simd",
"constant_time_eq",
"crossbeam-utils",
]
[[package]]
name = "rustc_version"
version = "0.2.3"
@ -1071,6 +1151,29 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
[[package]]
name = "schemars"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be77ed66abed6954aabf6a3e31a84706bedbf93750d267e92ef4a6d90bbd6a61"
dependencies = [
"schemars_derive",
"serde",
"serde_json",
]
[[package]]
name = "schemars_derive"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11af7a475c9ee266cfaa9e303a47c830ebe072bf3101ab907a7b7b9d816fa01d"
dependencies = [
"proc-macro2",
"quote",
"serde_derive_internals",
"syn",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
@ -1112,6 +1215,17 @@ dependencies = [
"syn",
]
[[package]]
name = "serde_derive_internals"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dbab34ca63057a1f15280bdf3c39f2b1eb1b54c17e98360e511637aef7418c6"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.53"

View File

@ -19,7 +19,6 @@ exclude = [
tree_magic = { package = "tree_magic_fork", version = "0.2.2" }
regex = "1.3.9"
rkv = "0.10.4"
cachedir = "0.1.1"
path-clean = "0.1.0"
bincode = "1.2.1"
serde = { version = "1.0.111", features = ["derive"] }
@ -45,3 +44,6 @@ paste = "0.1.16"
tempfile = "3.1.0"
glob = "0.3.0"
anyhow = "1.0.31"
schemars = "0.7.6"
directories-next = "1.0.1"
derive_more = "0.99.7"

View File

@ -1 +1 @@
stable
nightly

View File

@ -1,21 +1,67 @@
use crate::project_dirs;
use anyhow::*;
use derive_more::FromStr;
use log::*;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::ffi::OsString;
use std::{iter::IntoIterator, str::FromStr};
use std::{fs::File, io::Write, iter::IntoIterator, str::FromStr};
use structopt::StructOpt;
#[derive(Debug, Deserialize, Serialize)]
struct ReadableBytesCount(i64);
fn parse_readable_bytes_str(s: &str) -> Result<i64, Error> {
fn is_default<T: Default + PartialEq>(t: &T) -> bool {
t == &T::default()
}
#[derive(JsonSchema, Debug, Serialize, Deserialize, Copy, Clone, PartialEq, FromStr)]
pub struct CacheCompressionLevel(pub i32);
impl ToString for CacheCompressionLevel {
fn to_string(&self) -> String {
self.0.to_string()
}
}
impl Default for CacheCompressionLevel {
fn default() -> Self {
CacheCompressionLevel(12)
}
}
#[derive(JsonSchema, Debug, Serialize, Deserialize, Copy, Clone, PartialEq, FromStr)]
pub struct MaxArchiveRecursion(pub i32);
impl ToString for MaxArchiveRecursion {
fn to_string(&self) -> String {
self.0.to_string()
}
}
impl Default for MaxArchiveRecursion {
fn default() -> Self {
MaxArchiveRecursion(4)
}
}
#[derive(JsonSchema, Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
pub struct CacheMaxBlobLen(pub usize);
impl ToString for CacheMaxBlobLen {
fn to_string(&self) -> String {
self.0.to_string()
}
}
impl Default for CacheMaxBlobLen {
fn default() -> Self {
CacheMaxBlobLen(2000000)
}
}
impl FromStr for CacheMaxBlobLen {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let suffix = s.chars().last();
if let Some(suffix) = suffix {
match suffix {
'k' | 'M' | 'G' => i64::from_str(s.trim_end_matches(suffix))
Ok(CacheMaxBlobLen(match suffix {
'k' | 'M' | 'G' => usize::from_str(s.trim_end_matches(suffix))
.with_context(|| format!("Could not parse int"))
.map(|e| {
e * match suffix {
@ -25,36 +71,15 @@ fn parse_readable_bytes_str(s: &str) -> Result<i64, Error> {
_ => panic!("impossible"),
}
}),
_ => i64::from_str(s).with_context(|| format!("Could not parse int")),
}
_ => usize::from_str(s).with_context(|| format!("Could not parse int")),
}?))
} else {
Err(format_err!("empty byte input"))
}
}
fn is_default<T: Default + PartialEq>(t: &T) -> bool {
t == &T::default()
}
// ugly, but serde and structopt use different methods to define defaults, so need to declare defaults twice
macro_rules! set_default {
($name:ident, $val:expr, $type:ty) => {
paste::item! {
fn [<def_ $name>]() -> $type {
$val
}
fn [<def_ $name _if>](e: &$type) -> bool {
e == &[<def_ $name>]()
}
}
};
}
set_default!(cache_compression_level, 12, u32);
set_default!(cache_max_blob_len, 2000000, i64);
set_default!(max_archive_recursion, 4, i32);
#[derive(StructOpt, Debug, Deserialize, Serialize)]
#[derive(StructOpt, Debug, Deserialize, Serialize, JsonSchema, Default)]
#[structopt(
name = "ripgrep-all",
rename_all = "kebab-case",
@ -64,7 +89,14 @@ set_default!(max_archive_recursion, 4, i32);
after_help = "-h shows a concise overview, --help shows more detail and advanced options.\n\nAll other options not shown here are passed directly to rg, especially [PATTERN] and [PATH ...]",
usage = "rga [RGA OPTIONS] [RG OPTIONS] PATTERN [PATH ...]"
)]
pub struct RgaArgs {
/// # rga configuration
///
/// this is kind of a "polyglot" struct, since it serves three functions
///
/// 1. describing the command line arguments using structopt+clap
/// 2. describing the config file format (output as JSON schema via schemars)
pub struct RgaConfig {
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(long = "--rga-no-cache")]
/// Disable caching of results
@ -102,50 +134,41 @@ pub struct RgaArgs {
/// "+bar,baz" means use all default adapters and also bar and baz.
pub adapters: Vec<String>,
#[serde(
default = "def_cache_max_blob_len",
skip_serializing_if = "def_cache_max_blob_len_if"
)]
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(
default_value,
long = "--rga-cache-max-blob-len",
default_value = "2000000",
hidden_short_help = true,
require_equals = true,
parse(try_from_str = parse_readable_bytes_str)
// parse(try_from_str = parse_readable_bytes_str)
)]
/// Max compressed size to cache
///
/// Longest byte length (after compression) to store in cache. Longer adapter outputs will not be cached and recomputed every time. Allowed suffixes: k M G
pub cache_max_blob_len: i64,
pub cache_max_blob_len: CacheMaxBlobLen,
#[serde(
default = "def_cache_compression_level",
skip_serializing_if = "def_cache_compression_level_if"
)]
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(
default_value,
long = "--rga-cache-compression-level",
hidden_short_help = true,
default_value = "12",
require_equals = true,
help = ""
)]
/// ZSTD compression level to apply to adapter outputs before storing in cache db
///
/// Ranges from 1 - 22
pub cache_compression_level: u32,
pub cache_compression_level: CacheCompressionLevel,
#[serde(
default = "def_max_archive_recursion",
skip_serializing_if = "def_max_archive_recursion_if"
)]
#[serde(default, skip_serializing_if = "is_default")]
#[structopt(
default_value,
long = "--rga-max-archive-recursion",
default_value = "4",
require_equals = true,
help = "Maximum nestedness of archives to recurse into",
hidden_short_help = true
)]
pub max_archive_recursion: i32,
/// Maximum nestedness of archives to recurse into
pub max_archive_recursion: MaxArchiveRecursion,
#[serde(skip)]
#[structopt(long = "--rga-fzf-path", require_equals = true, hidden = true)]
@ -153,11 +176,18 @@ pub struct RgaArgs {
/// kinda hacky, but if no file is found, fzf calls rga with empty string as path, which causes No such file or directory from rg. So filter those cases and return specially
pub fzf_path: Option<String>,
// these arguments stop the process, so don't serialize them
// these arguments are basically "subcommands" that stop the process, so don't serialize them
#[serde(skip)]
#[structopt(long = "--rga-list-adapters", help = "List all known adapters")]
pub list_adapters: bool,
#[serde(skip)]
#[structopt(
long = "--rga-print-config-schema",
help = "Print the JSON Schema of the configuration file"
)]
pub print_config_schema: bool,
#[serde(skip)]
#[structopt(long, help = "Show help for ripgrep itself")]
pub rg_help: bool,
@ -165,15 +195,43 @@ pub struct RgaArgs {
#[serde(skip)]
#[structopt(long, help = "Show version of ripgrep itself")]
pub rg_version: bool,
#[serde(rename = "$schema", default = "default_schema_path")]
#[structopt(skip)]
pub _schema_key: String,
}
fn default_schema_path() -> String {
"./config.schema.json".to_string()
}
static RGA_CONFIG: &str = "RGA_CONFIG";
pub fn parse_args<I>(args: I) -> Result<RgaArgs>
pub fn parse_args<I>(args: I) -> Result<RgaConfig>
where
I: IntoIterator,
I::Item: Into<OsString> + Clone,
{
let proj = project_dirs()?;
let config_dir = proj.config_dir();
if config_dir.join("config.json").exists() {
// todo: read config
} else {
std::fs::create_dir_all(config_dir)?;
let mut schemafile = File::create(config_dir.join("config.schema.json"))?;
schemafile
.write(serde_json::to_string_pretty(&schemars::schema_for!(RgaConfig))?.as_bytes())?;
let mut configfile = File::create(config_dir.join("config.json"))?;
let mut v = serde_json::to_value(&RgaConfig::default())?;
match &mut v {
serde_json::Value::Object(o) => {
o["$schema"] = serde_json::Value::String("./config.schema.json".to_string())
}
_ => panic!("impos"),
}
configfile.write(serde_json::to_string_pretty(&v)?.as_bytes())?;
}
match std::env::var(RGA_CONFIG) {
Ok(val) => {
debug!(
@ -183,7 +241,7 @@ where
Ok(serde_json::from_str(&val)?)
}
Err(_) => {
let matches = RgaArgs::from_iter(args);
let matches = RgaConfig::from_iter(args);
let serialized_config = serde_json::to_string(&matches)?;
std::env::set_var(RGA_CONFIG, &serialized_config);
debug!("{}={}", RGA_CONFIG, serialized_config);
@ -194,8 +252,8 @@ where
}
/// Split arguments into the ones we care about and the ones rg cares about
pub fn split_args() -> Result<(RgaArgs, Vec<OsString>)> {
let mut app = RgaArgs::clap();
pub fn split_args() -> Result<(RgaConfig, Vec<OsString>)> {
let mut app = RgaConfig::clap();
app.p.create_help_and_version();
let mut firstarg = true;

37
src/bin/rga-fzf-open.rs Normal file
View File

@ -0,0 +1,37 @@
use anyhow::Context;
use rga::adapters::spawning::map_exe_error;
use ripgrep_all as rga;
use std::process::{Command, Stdio};
// TODO: add --rg-params=..., --rg-preview-params=... and --fzf-params=... params
// TODO: remove passthrough_args
fn main() -> anyhow::Result<()> {
env_logger::init();
let mut args = std::env::args().skip(1);
let query = args.next().context("no query")?;
let fname = args.next().context("no filename")?;
// let instance_id = std::env::var("RGA_FZF_INSTANCE").unwrap_or("unk".to_string());
if fname.ends_with(".pdf") {
use std::io::ErrorKind::*;
let worked = Command::new("evince")
.arg("--find")
.arg(&query)
.arg(&fname)
.spawn()
.map_or_else(
|err| match err.kind() {
NotFound => Ok(false),
_ => Err(err),
},
|_| Ok(true),
)?;
if worked {
return Ok(());
}
}
Command::new("xdg-open").arg(fname).spawn()?;
Ok(())
}

View File

@ -4,6 +4,8 @@ use ripgrep_all as rga;
use std::process::{Command, Stdio};
// TODO: add --rg-params=..., --rg-preview-params=... and --fzf-params=... params
// TODO: remove passthrough_args
fn main() -> anyhow::Result<()> {
env_logger::init();
let mut passthrough_args: Vec<String> = std::env::args().skip(1).collect();
@ -19,6 +21,10 @@ fn main() -> anyhow::Result<()> {
let preproc_exe = preproc_exe
.to_str()
.context("rga executable is in non-unicode path")?;
let open_exe = exe.with_file_name("rga-fzf-open");
let open_exe = open_exe
.to_str()
.context("rga-fzf-open executable is in non-unicode path")?;
let rg_prefix = format!(
"{} --files-with-matches --rga-cache-max-blob-len=10M",
@ -30,15 +36,18 @@ fn main() -> anyhow::Result<()> {
"--preview={} --pretty --context 5 {{q}} --rga-fzf-path=_{{}}",
preproc_exe
))
.arg("--preview-window=70%:wrap")
.arg("--phony")
.arg("--query")
.arg(&initial_query)
.arg("--print-query")
.arg(format!("--bind=change:reload: {} {{q}}", rg_prefix))
.arg(format!("--bind=ctrl-m:execute:{} {{q}} {{}}", open_exe))
.env(
"FZF_DEFAULT_COMMAND",
format!("{} '{}'", rg_prefix, &initial_query),
)
.env("RGA_FZF_INSTANCE", format!("{}", std::process::id())) // may be useful to open stuff in the same tab
.stdout(Stdio::piped())
.spawn()
.map_err(|e| map_exe_error(e, "fzf", "Please make sure you have fzf installed."))?;
@ -51,25 +60,5 @@ fn main() -> anyhow::Result<()> {
.context("fzf ofilename not utf8")?;
println!("query='{}', file='{}'", final_query, selected_file);
if selected_file.ends_with(".pdf") {
use std::io::ErrorKind::*;
let worked = Command::new("evince")
.arg("--find")
.arg(final_query)
.arg(selected_file)
.spawn()
.map_or_else(
|err| match err.kind() {
NotFound => Ok(false),
_ => Err(err),
},
|_| Ok(true),
)?;
if worked {
return Ok(());
}
}
Command::new("xdg-open").arg(selected_file).spawn()?;
Ok(())
}

View File

@ -7,6 +7,7 @@ use rga::matching::*;
use ripgrep_all as rga;
use structopt::StructOpt;
use schemars::schema_for;
use std::process::Command;
fn main() -> anyhow::Result<()> {
@ -14,6 +15,11 @@ fn main() -> anyhow::Result<()> {
let (args, mut passthrough_args) = split_args()?;
if args.print_config_schema {
println!("{}", serde_json::to_string_pretty(&schema_for!(RgaConfig))?);
return Ok(());
}
if args.list_adapters {
let (enabled_adapters, disabled_adapters) = get_all_adapters();
@ -73,7 +79,7 @@ fn main() -> anyhow::Result<()> {
if passthrough_args.len() == 0 {
// rg would show help. Show own help instead.
RgaArgs::clap().print_help()?;
RgaConfig::clap().print_help()?;
println!("");
return Ok(());
}

View File

@ -6,4 +6,12 @@ mod caching_writer;
pub mod matching;
pub mod preproc;
pub mod preproc_cache;
use anyhow::Context;
use anyhow::Result;
pub use caching_writer::CachingWriter;
use directories_next::ProjectDirs;
pub fn project_dirs() -> Result<ProjectDirs> {
directories_next::ProjectDirs::from("", "", "ripgrep-all")
.context("no home directory found! :(")
}

View File

@ -1,5 +1,5 @@
use crate::adapters::*;
use crate::args::RgaArgs;
use crate::args::RgaConfig;
use crate::matching::*;
use crate::CachingWriter;
use anyhow::*;
@ -14,7 +14,7 @@ use std::sync::{Arc, RwLock};
#[derive(Clone)]
pub struct PreprocConfig<'a> {
pub cache: Option<Arc<RwLock<dyn crate::preproc_cache::PreprocCache>>>,
pub args: &'a RgaArgs,
pub args: &'a RgaConfig,
}
/**
* preprocess a file as defined in `ai`.
@ -39,7 +39,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
.file_name()
.ok_or_else(|| format_err!("Empty filename"))?;
debug!("depth: {}", archive_recursion_depth);
if archive_recursion_depth >= args.max_archive_recursion {
if archive_recursion_depth >= args.max_archive_recursion.0 {
writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?;
return Ok(());
}
@ -102,8 +102,8 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
// wrapping BufWriter here gives ~10% perf boost
let mut compbuf = BufWriter::new(CachingWriter::new(
oup,
args.cache_max_blob_len.try_into().unwrap(),
args.cache_compression_level.try_into().unwrap(),
args.cache_max_blob_len.0.try_into().unwrap(),
args.cache_compression_level.0.try_into().unwrap(),
)?);
debug!("adapting...");
adapter

View File

@ -1,3 +1,4 @@
use crate::project_dirs;
use anyhow::{format_err, Context, Result};
use log::*;
use std::{
@ -21,12 +22,14 @@ pub trait PreprocCache {
/// opens a LMDB cache
fn open_cache_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>> {
let app_cache = cachedir::CacheDirConfig::new("rga").get_cache_dir()?;
let pd = project_dirs()?;
let app_cache = pd.cache_dir();
std::fs::create_dir_all(app_cache)?;
rkv::Manager::singleton()
.write()
.map_err(|_| format_err!("could not write cache db manager"))?
.get_or_create(app_cache.as_path(), |p| {
.get_or_create(app_cache, |p| {
let mut builder = rkv::Rkv::environment_builder();
builder
.set_flags(rkv::EnvironmentFlags::NO_SYNC | rkv::EnvironmentFlags::WRITE_MAP) // not durable cuz it's a cache