From 5410da1133be8fbae5c3182db4d35c635d3d1291 Mon Sep 17 00:00:00 2001 From: phiresky Date: Tue, 9 Jun 2020 12:47:34 +0200 Subject: [PATCH] add lots of debug and timings --- Cargo.lock | 20 +++++++++++++ Cargo.toml | 1 + src/adapters.rs | 2 +- src/adapters/tar.rs | 6 ++-- src/args.rs | 3 ++ src/bin/rga.rs | 14 ++++++--- src/caching_writer.rs | 18 ++++++------ src/lib.rs | 48 ++++++++++++++++++++++++++++++ src/matching.rs | 2 +- src/preproc.rs | 68 +++++++++++++++++++++++++++---------------- src/preproc_cache.rs | 20 ++++++++++--- 11 files changed, 155 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0fc6d67..166de5f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -415,6 +415,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.1.14" @@ -794,6 +803,16 @@ version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea" +[[package]] +name = "pretty-bytes" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009d6edd2c1dbf2e1c0cd48a2f7766e03498d49ada7109a01c6911815c685316" +dependencies = [ + "atty", + "getopts", +] + [[package]] name = "proc-macro-error" version = "1.0.2" @@ -1071,6 +1090,7 @@ dependencies = [ "log", "paste", "path-clean", + "pretty-bytes", "regex", "rkv", "rusqlite", diff --git a/Cargo.toml b/Cargo.toml index c6cf4f1..4c95747 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,3 +47,4 @@ anyhow = "1.0.31" schemars = "0.7.6" directories-next = "1.0.1" derive_more = "0.99.7" +pretty-bytes = "0.2.2" diff --git a/src/adapters.rs b/src/adapters.rs index 33b067b..c857119 100644 --- a/src/adapters.rs +++ b/src/adapters.rs @@ -169,7 +169,7 @@ pub fn get_adapters_filtered>( def_enabled_adapters }; debug!( - "Chosen adapters: {}", + "Chosen available adapters: {}", adapters .iter() .map(|a| a.metadata().name.clone()) diff --git a/src/adapters/tar.rs b/src/adapters/tar.rs index fd559b1..76724cf 100644 --- a/src/adapters/tar.rs +++ b/src/adapters/tar.rs @@ -1,5 +1,5 @@ use super::*; -use crate::preproc::rga_preproc; +use crate::{preproc::rga_preproc, print_bytes}; use ::tar::EntryType::Regular; use anyhow::*; use lazy_static::lazy_static; @@ -52,10 +52,10 @@ impl FileAdapter for TarAdapter { if Regular == file.header().entry_type() { let path = PathBuf::from(file.path()?.to_owned()); debug!( - "{}|{}: {} bytes", + "{}|{}: {}", filepath_hint.display(), path.display(), - file.header().size()?, + print_bytes(file.header().size()? as f64), ); let line_prefix = &format!("{}{}: ", line_prefix, path.display()); let ai2: AdaptInfo = AdaptInfo { diff --git a/src/args.rs b/src/args.rs index d22cc9f..406b26c 100644 --- a/src/args.rs +++ b/src/args.rs @@ -234,6 +234,7 @@ where let proj = project_dirs()?; let config_dir = proj.config_dir(); let config_filename = config_dir.join("config.json"); + // TODO: don't read config file in rga-preproc for performance (called for every file) let config_file_config = { if config_filename.exists() { let config_file_contents = @@ -304,6 +305,8 @@ where json_merge(&mut merged_config, &env_var_config); json_merge(&mut merged_config, &args_config); + // pass to child processes + std::env::set_var(RGA_CONFIG, &merged_config.to_string()); log::debug!( "Merged config: {}", serde_json::to_string_pretty(&merged_config)? diff --git a/src/bin/rga.rs b/src/bin/rga.rs index 7dea264..b062b0d 100644 --- a/src/bin/rga.rs +++ b/src/bin/rga.rs @@ -1,14 +1,15 @@ use anyhow::Result; -use log::*; use rga::adapters::spawning::map_exe_error; use rga::adapters::*; use rga::args::*; use rga::matching::*; +use rga::print_dur; use ripgrep_all as rga; use structopt::StructOpt; use schemars::schema_for; use std::process::Command; +use std::time::{Duration, Instant}; fn list_adapters(args: RgaConfig) -> Result<()> { let (enabled_adapters, disabled_adapters) = get_all_adapters(args.custom_adapters.clone()); @@ -114,17 +115,22 @@ fn main() -> anyhow::Result<()> { let exe = std::env::current_exe().expect("Could not get executable location"); let preproc_exe = exe.with_file_name("rga-preproc"); - let mut child = Command::new("rg") - .args(rg_args) + let before = Instant::now(); + let mut cmd = Command::new("rg"); + cmd.args(rg_args) .arg("--pre") .arg(preproc_exe) .arg("--pre-glob") .arg(pre_glob) - .args(passthrough_args) + .args(passthrough_args); + log::debug!("rg command to run: {:?}", cmd); + let mut child = cmd .spawn() .map_err(|e| map_exe_error(e, "rg", "Please make sure you have ripgrep installed."))?; child.wait()?; + + log::debug!("running rg took {}", print_dur(before)); Ok(()) } diff --git a/src/caching_writer.rs b/src/caching_writer.rs index b6e22c7..6d8f042 100644 --- a/src/caching_writer.rs +++ b/src/caching_writer.rs @@ -11,6 +11,7 @@ pub struct CachingWriter { max_cache_size: usize, zstd_writer: Option>>, out: W, + bytes_written: u64, } impl CachingWriter { pub fn new(out: W, max_cache_size: usize, compression_level: i32) -> Result> { @@ -21,25 +22,22 @@ impl CachingWriter { Vec::new(), compression_level, )?), + bytes_written: 0, }) } - pub fn finish(self) -> std::io::Result>> { + pub fn finish(self) -> std::io::Result<(u64, Option>)> { if let Some(writer) = self.zstd_writer { let res = writer.finish()?; if res.len() <= self.max_cache_size { - Ok(Some(res)) - } else { - // drop cache - Ok(None) + return Ok((self.bytes_written, Some(res))); } - } else { - Ok(None) } + Ok((self.bytes_written, None)) } } impl Write for CachingWriter { fn write(&mut self, buf: &[u8]) -> std::io::Result { - match self.zstd_writer.as_mut() { + let written_bytes = match self.zstd_writer.as_mut() { Some(writer) => { let wrote = writer.write(buf)?; let compressed_len = writer.get_ref().len(); @@ -53,7 +51,9 @@ impl Write for CachingWriter { Ok(wrote) } None => self.out.write(buf), - } + }?; + self.bytes_written += written_bytes as u64; + Ok(written_bytes) } fn flush(&mut self) -> std::io::Result<()> { debug!("flushing"); diff --git a/src/lib.rs b/src/lib.rs index 69a46af..9aca539 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,8 +10,56 @@ use anyhow::Context; use anyhow::Result; pub use caching_writer::CachingWriter; use directories_next::ProjectDirs; +use std::time::Instant; pub fn project_dirs() -> Result { directories_next::ProjectDirs::from("", "", "ripgrep-all") .context("no home directory found! :(") } + +// no "significant digits" format specifier in rust?? +// https://stackoverflow.com/questions/60497397/how-do-you-format-a-float-to-the-first-significant-decimal-and-with-specified-pr +fn meh(float: f32, precision: usize) -> usize { + // compute absolute value + let a = float.abs(); + + // if abs value is greater than 1, then precision becomes less than "standard" + let precision = if a >= 1. { + // reduce by number of digits, minimum 0 + let n = (1. + a.log10().floor()) as usize; + if n <= precision { + precision - n + } else { + 0 + } + // if precision is less than 1 (but non-zero), then precision becomes greater than "standard" + } else if a > 0. { + // increase number of digits + let n = -(1. + a.log10().floor()) as usize; + precision + n + // special case for 0 + } else { + 0 + }; + precision +} + +pub fn print_dur(start: Instant) -> String { + let mut dur = Instant::now().duration_since(start).as_secs_f32(); + let mut suffix = ""; + if dur < 0.1 { + suffix = "m"; + dur *= 1000.0; + } + let precision = meh(dur, 3); + format!( + "{dur:.prec$}{suffix}s", + dur = dur, + prec = precision, + suffix = suffix + ) +} + +pub fn print_bytes(bytes: impl Into) -> String { + return pretty_bytes::converter::convert(bytes.into()); +} diff --git a/src/matching.rs b/src/matching.rs index 297faf2..cbe1816 100644 --- a/src/matching.rs +++ b/src/matching.rs @@ -47,7 +47,7 @@ pub fn extension_to_regex(extension: &str) -> Regex { } pub fn adapter_matcher( - adapters: Vec>, + adapters: &Vec>, slow: bool, ) -> Result Option<(Rc, SlowMatcher)>> { // need order later diff --git a/src/preproc.rs b/src/preproc.rs index 5cebe0a..943569b 100644 --- a/src/preproc.rs +++ b/src/preproc.rs @@ -1,7 +1,7 @@ use crate::adapters::*; use crate::args::RgaConfig; use crate::matching::*; -use crate::CachingWriter; +use crate::{print_bytes, print_dur, CachingWriter}; use anyhow::*; use log::*; use path_clean::PathClean; @@ -9,7 +9,10 @@ use std::convert::TryInto; use std::io::BufRead; use std::io::BufReader; use std::io::BufWriter; -use std::sync::{Arc, RwLock}; +use std::{ + sync::{Arc, RwLock}, + time::Instant, +}; #[derive(Clone)] pub struct PreprocConfig<'a> { @@ -33,22 +36,19 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> { archive_recursion_depth, .. } = ai; + debug!("path (hint) to preprocess: {:?}", filepath_hint); let PreprocConfig { mut cache, args } = config; - let adapters = adapter_matcher( - get_adapters_filtered(args.custom_adapters.clone(), &args.adapters)?, - args.accurate, - )?; + let filtered_adapters = get_adapters_filtered(args.custom_adapters.clone(), &args.adapters)?; + let adapters = adapter_matcher(&filtered_adapters, args.accurate)?; let filename = filepath_hint .file_name() .ok_or_else(|| format_err!("Empty filename"))?; - debug!("depth: {}", archive_recursion_depth); + debug!("Archive recursion depth: {}", archive_recursion_depth); if archive_recursion_depth >= args.max_archive_recursion.0 { writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?; return Ok(()); } - debug!("path_hint: {:?}", filepath_hint); - // todo: figure out when using a bufreader is a good idea and when it is not // seems to be good for File::open() reads, but not sure about within archives (tar, zip) let inp = &mut BufReader::with_capacity(1 << 13, inp); @@ -69,38 +69,47 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> { Some((adapter, detection_reason)) => { let meta = adapter.metadata(); debug!( - "chose adapter '{}' because of matcher {:?}", + "Chose adapter '{}' because of matcher {:?}", &meta.name, &detection_reason ); - eprintln!("adapter: {}", &meta.name); + eprintln!( + "{} adapter: {}", + filepath_hint.to_string_lossy(), + &meta.name + ); let db_name = format!("{}.v{}", meta.name, meta.version); if let Some(cache) = cache.as_mut() { let cache_key: Vec = { let clean_path = filepath_hint.to_owned().clean(); let meta = std::fs::metadata(&filepath_hint)?; + let modified = meta.modified().expect("weird OS that can't into mtime"); if adapter.metadata().recurses { let key = ( + filtered_adapters + .iter() + .map(|a| (a.metadata().name.clone(), a.metadata().version)) + .collect::>(), clean_path, - meta.modified().expect("weird OS that can't into mtime"), - &args.adapters[..], + modified, ); - debug!("cache key: {:?}", key); + debug!("Cache key (with recursion): {:?}", key); bincode::serialize(&key).expect("could not serialize path") - // key in the cache database } else { let key = ( + adapter.metadata().name.clone(), + adapter.metadata().version, clean_path, - meta.modified().expect("weird OS that can't into mtime"), + modified, ); - debug!("cache key: {:?}", key); + debug!("Cache key (no recursion): {:?}", key); bincode::serialize(&key).expect("could not serialize path") - // key in the cache database } }; cache.write().unwrap().get_or_run( &db_name, &cache_key, + &adapter.metadata().name, Box::new(|| -> Result>> { // wrapping BufWriter here gives ~10% perf boost let mut compbuf = BufWriter::new(CachingWriter::new( @@ -108,7 +117,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> { args.cache_max_blob_len.0.try_into().unwrap(), args.cache_compression_level.0.try_into().unwrap(), )?); - debug!("adapting..."); + debug!("adapting with caching..."); adapter .adapt( AdaptInfo { @@ -129,13 +138,16 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> { meta.name ) })?; - let compressed = compbuf + let (uncompressed_size, compressed) = compbuf .into_inner() - .map_err(|_| "could not finish zstd") // can't use with_context here - .unwrap() + .map_err(|_| anyhow!("could not finish zstd"))? // can't use with_context here .finish()?; + debug!( + "uncompressed output: {}", + print_bytes(uncompressed_size as f64) + ); if let Some(cached) = compressed { - debug!("compressed len: {}", cached.len()); + debug!("compressed output: {}", print_bytes(cached.len() as f64)); Ok(Some(cached)) } else { Ok(None) @@ -149,8 +161,9 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> { )?; Ok(()) } else { - // couldn't open cache - debug!("adapting..."); + // no cache arc - probably within archive + debug!("adapting without caching..."); + let start = Instant::now(); adapter .adapt( AdaptInfo { @@ -171,6 +184,11 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> { meta.name ) })?; + debug!( + "running adapter {} took {}", + adapter.metadata().name, + print_dur(start) + ); Ok(()) } } diff --git a/src/preproc_cache.rs b/src/preproc_cache.rs index 219081c..f02cfa3 100644 --- a/src/preproc_cache.rs +++ b/src/preproc_cache.rs @@ -1,9 +1,10 @@ -use crate::project_dirs; +use crate::{print_bytes, print_dur, project_dirs}; use anyhow::{format_err, Context, Result}; use log::*; use std::{ fmt::Display, sync::{Arc, RwLock}, + time::Instant, }; pub fn open() -> Result>> { @@ -15,6 +16,7 @@ pub trait PreprocCache { &mut self, db_name: &str, key: &[u8], + adapter_name: &str, runner: Box Result>> + 'a>, callback: Box Result<()> + 'a>, ) -> Result<()>; @@ -73,9 +75,11 @@ impl PreprocCache for LmdbCache { &mut self, db_name: &str, key: &[u8], + adapter_name: &str, runner: Box Result>> + 'a>, callback: Box Result<()> + 'a>, ) -> Result<()> { + let start = Instant::now(); let db_env = self.db_arc.read().unwrap(); let db = db_env .open_single(db_name, rkv::store::Options::create()) @@ -90,14 +94,19 @@ impl PreprocCache for LmdbCache { match cached { Some(rkv::Value::Blob(cached)) => { - debug!("got cached"); + debug!("cache HIT, reading from cache"); + debug!("reading from cache took {}", print_dur(start)); callback(cached)?; } Some(_) => Err(format_err!("Integrity: value not blob"))?, None => { - debug!("did not get cached"); + debug!("cache MISS, running adapter"); drop(reader); - if let Some(got) = runner()? { + let runner_res = runner()?; + debug!("running adapter {} took {}", adapter_name, print_dur(start)); + let start = Instant::now(); + if let Some(got) = runner_res { + debug!("writing {} to cache", print_bytes(got.len() as f64)); let mut writer = db_env .write() .map_err(RkvErrWrap) @@ -109,6 +118,9 @@ impl PreprocCache for LmdbCache { .commit() .map_err(RkvErrWrap) .with_context(|| format!("could not write cache"))?; + debug!("writing to cache took {}", print_dur(start)); + } else { + debug!("not caching output"); } } };