add lots of debug and timings

This commit is contained in:
phiresky 2020-06-09 12:47:34 +02:00
parent 94099baeb3
commit 5410da1133
11 changed files with 155 additions and 47 deletions

20
Cargo.lock generated
View File

@ -415,6 +415,15 @@ dependencies = [
"typenum", "typenum",
] ]
[[package]]
name = "getopts"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
dependencies = [
"unicode-width",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.1.14" version = "0.1.14"
@ -794,6 +803,16 @@ version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea" checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea"
[[package]]
name = "pretty-bytes"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "009d6edd2c1dbf2e1c0cd48a2f7766e03498d49ada7109a01c6911815c685316"
dependencies = [
"atty",
"getopts",
]
[[package]] [[package]]
name = "proc-macro-error" name = "proc-macro-error"
version = "1.0.2" version = "1.0.2"
@ -1071,6 +1090,7 @@ dependencies = [
"log", "log",
"paste", "paste",
"path-clean", "path-clean",
"pretty-bytes",
"regex", "regex",
"rkv", "rkv",
"rusqlite", "rusqlite",

View File

@ -47,3 +47,4 @@ anyhow = "1.0.31"
schemars = "0.7.6" schemars = "0.7.6"
directories-next = "1.0.1" directories-next = "1.0.1"
derive_more = "0.99.7" derive_more = "0.99.7"
pretty-bytes = "0.2.2"

View File

@ -169,7 +169,7 @@ pub fn get_adapters_filtered<T: AsRef<str>>(
def_enabled_adapters def_enabled_adapters
}; };
debug!( debug!(
"Chosen adapters: {}", "Chosen available adapters: {}",
adapters adapters
.iter() .iter()
.map(|a| a.metadata().name.clone()) .map(|a| a.metadata().name.clone())

View File

@ -1,5 +1,5 @@
use super::*; use super::*;
use crate::preproc::rga_preproc; use crate::{preproc::rga_preproc, print_bytes};
use ::tar::EntryType::Regular; use ::tar::EntryType::Regular;
use anyhow::*; use anyhow::*;
use lazy_static::lazy_static; use lazy_static::lazy_static;
@ -52,10 +52,10 @@ impl FileAdapter for TarAdapter {
if Regular == file.header().entry_type() { if Regular == file.header().entry_type() {
let path = PathBuf::from(file.path()?.to_owned()); let path = PathBuf::from(file.path()?.to_owned());
debug!( debug!(
"{}|{}: {} bytes", "{}|{}: {}",
filepath_hint.display(), filepath_hint.display(),
path.display(), path.display(),
file.header().size()?, print_bytes(file.header().size()? as f64),
); );
let line_prefix = &format!("{}{}: ", line_prefix, path.display()); let line_prefix = &format!("{}{}: ", line_prefix, path.display());
let ai2: AdaptInfo = AdaptInfo { let ai2: AdaptInfo = AdaptInfo {

View File

@ -234,6 +234,7 @@ where
let proj = project_dirs()?; let proj = project_dirs()?;
let config_dir = proj.config_dir(); let config_dir = proj.config_dir();
let config_filename = config_dir.join("config.json"); let config_filename = config_dir.join("config.json");
// TODO: don't read config file in rga-preproc for performance (called for every file)
let config_file_config = { let config_file_config = {
if config_filename.exists() { if config_filename.exists() {
let config_file_contents = let config_file_contents =
@ -304,6 +305,8 @@ where
json_merge(&mut merged_config, &env_var_config); json_merge(&mut merged_config, &env_var_config);
json_merge(&mut merged_config, &args_config); json_merge(&mut merged_config, &args_config);
// pass to child processes
std::env::set_var(RGA_CONFIG, &merged_config.to_string());
log::debug!( log::debug!(
"Merged config: {}", "Merged config: {}",
serde_json::to_string_pretty(&merged_config)? serde_json::to_string_pretty(&merged_config)?

View File

@ -1,14 +1,15 @@
use anyhow::Result; use anyhow::Result;
use log::*;
use rga::adapters::spawning::map_exe_error; use rga::adapters::spawning::map_exe_error;
use rga::adapters::*; use rga::adapters::*;
use rga::args::*; use rga::args::*;
use rga::matching::*; use rga::matching::*;
use rga::print_dur;
use ripgrep_all as rga; use ripgrep_all as rga;
use structopt::StructOpt; use structopt::StructOpt;
use schemars::schema_for; use schemars::schema_for;
use std::process::Command; use std::process::Command;
use std::time::{Duration, Instant};
fn list_adapters(args: RgaConfig) -> Result<()> { fn list_adapters(args: RgaConfig) -> Result<()> {
let (enabled_adapters, disabled_adapters) = get_all_adapters(args.custom_adapters.clone()); let (enabled_adapters, disabled_adapters) = get_all_adapters(args.custom_adapters.clone());
@ -114,17 +115,22 @@ fn main() -> anyhow::Result<()> {
let exe = std::env::current_exe().expect("Could not get executable location"); let exe = std::env::current_exe().expect("Could not get executable location");
let preproc_exe = exe.with_file_name("rga-preproc"); let preproc_exe = exe.with_file_name("rga-preproc");
let mut child = Command::new("rg") let before = Instant::now();
.args(rg_args) let mut cmd = Command::new("rg");
cmd.args(rg_args)
.arg("--pre") .arg("--pre")
.arg(preproc_exe) .arg(preproc_exe)
.arg("--pre-glob") .arg("--pre-glob")
.arg(pre_glob) .arg(pre_glob)
.args(passthrough_args) .args(passthrough_args);
log::debug!("rg command to run: {:?}", cmd);
let mut child = cmd
.spawn() .spawn()
.map_err(|e| map_exe_error(e, "rg", "Please make sure you have ripgrep installed."))?; .map_err(|e| map_exe_error(e, "rg", "Please make sure you have ripgrep installed."))?;
child.wait()?; child.wait()?;
log::debug!("running rg took {}", print_dur(before));
Ok(()) Ok(())
} }

View File

@ -11,6 +11,7 @@ pub struct CachingWriter<W: Write> {
max_cache_size: usize, max_cache_size: usize,
zstd_writer: Option<zstd::stream::write::Encoder<Vec<u8>>>, zstd_writer: Option<zstd::stream::write::Encoder<Vec<u8>>>,
out: W, out: W,
bytes_written: u64,
} }
impl<W: Write> CachingWriter<W> { impl<W: Write> CachingWriter<W> {
pub fn new(out: W, max_cache_size: usize, compression_level: i32) -> Result<CachingWriter<W>> { pub fn new(out: W, max_cache_size: usize, compression_level: i32) -> Result<CachingWriter<W>> {
@ -21,25 +22,22 @@ impl<W: Write> CachingWriter<W> {
Vec::new(), Vec::new(),
compression_level, compression_level,
)?), )?),
bytes_written: 0,
}) })
} }
pub fn finish(self) -> std::io::Result<Option<Vec<u8>>> { pub fn finish(self) -> std::io::Result<(u64, Option<Vec<u8>>)> {
if let Some(writer) = self.zstd_writer { if let Some(writer) = self.zstd_writer {
let res = writer.finish()?; let res = writer.finish()?;
if res.len() <= self.max_cache_size { if res.len() <= self.max_cache_size {
Ok(Some(res)) return Ok((self.bytes_written, Some(res)));
} else {
// drop cache
Ok(None)
} }
} else {
Ok(None)
} }
Ok((self.bytes_written, None))
} }
} }
impl<W: Write> Write for CachingWriter<W> { impl<W: Write> Write for CachingWriter<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
match self.zstd_writer.as_mut() { let written_bytes = match self.zstd_writer.as_mut() {
Some(writer) => { Some(writer) => {
let wrote = writer.write(buf)?; let wrote = writer.write(buf)?;
let compressed_len = writer.get_ref().len(); let compressed_len = writer.get_ref().len();
@ -53,7 +51,9 @@ impl<W: Write> Write for CachingWriter<W> {
Ok(wrote) Ok(wrote)
} }
None => self.out.write(buf), None => self.out.write(buf),
} }?;
self.bytes_written += written_bytes as u64;
Ok(written_bytes)
} }
fn flush(&mut self) -> std::io::Result<()> { fn flush(&mut self) -> std::io::Result<()> {
debug!("flushing"); debug!("flushing");

View File

@ -10,8 +10,56 @@ use anyhow::Context;
use anyhow::Result; use anyhow::Result;
pub use caching_writer::CachingWriter; pub use caching_writer::CachingWriter;
use directories_next::ProjectDirs; use directories_next::ProjectDirs;
use std::time::Instant;
pub fn project_dirs() -> Result<ProjectDirs> { pub fn project_dirs() -> Result<ProjectDirs> {
directories_next::ProjectDirs::from("", "", "ripgrep-all") directories_next::ProjectDirs::from("", "", "ripgrep-all")
.context("no home directory found! :(") .context("no home directory found! :(")
} }
// no "significant digits" format specifier in rust??
// https://stackoverflow.com/questions/60497397/how-do-you-format-a-float-to-the-first-significant-decimal-and-with-specified-pr
fn meh(float: f32, precision: usize) -> usize {
// compute absolute value
let a = float.abs();
// if abs value is greater than 1, then precision becomes less than "standard"
let precision = if a >= 1. {
// reduce by number of digits, minimum 0
let n = (1. + a.log10().floor()) as usize;
if n <= precision {
precision - n
} else {
0
}
// if precision is less than 1 (but non-zero), then precision becomes greater than "standard"
} else if a > 0. {
// increase number of digits
let n = -(1. + a.log10().floor()) as usize;
precision + n
// special case for 0
} else {
0
};
precision
}
pub fn print_dur(start: Instant) -> String {
let mut dur = Instant::now().duration_since(start).as_secs_f32();
let mut suffix = "";
if dur < 0.1 {
suffix = "m";
dur *= 1000.0;
}
let precision = meh(dur, 3);
format!(
"{dur:.prec$}{suffix}s",
dur = dur,
prec = precision,
suffix = suffix
)
}
pub fn print_bytes(bytes: impl Into<f64>) -> String {
return pretty_bytes::converter::convert(bytes.into());
}

View File

@ -47,7 +47,7 @@ pub fn extension_to_regex(extension: &str) -> Regex {
} }
pub fn adapter_matcher( pub fn adapter_matcher(
adapters: Vec<Rc<dyn FileAdapter>>, adapters: &Vec<Rc<dyn FileAdapter>>,
slow: bool, slow: bool,
) -> Result<impl Fn(FileMeta) -> Option<(Rc<dyn FileAdapter>, SlowMatcher)>> { ) -> Result<impl Fn(FileMeta) -> Option<(Rc<dyn FileAdapter>, SlowMatcher)>> {
// need order later // need order later

View File

@ -1,7 +1,7 @@
use crate::adapters::*; use crate::adapters::*;
use crate::args::RgaConfig; use crate::args::RgaConfig;
use crate::matching::*; use crate::matching::*;
use crate::CachingWriter; use crate::{print_bytes, print_dur, CachingWriter};
use anyhow::*; use anyhow::*;
use log::*; use log::*;
use path_clean::PathClean; use path_clean::PathClean;
@ -9,7 +9,10 @@ use std::convert::TryInto;
use std::io::BufRead; use std::io::BufRead;
use std::io::BufReader; use std::io::BufReader;
use std::io::BufWriter; use std::io::BufWriter;
use std::sync::{Arc, RwLock}; use std::{
sync::{Arc, RwLock},
time::Instant,
};
#[derive(Clone)] #[derive(Clone)]
pub struct PreprocConfig<'a> { pub struct PreprocConfig<'a> {
@ -33,22 +36,19 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
archive_recursion_depth, archive_recursion_depth,
.. ..
} = ai; } = ai;
debug!("path (hint) to preprocess: {:?}", filepath_hint);
let PreprocConfig { mut cache, args } = config; let PreprocConfig { mut cache, args } = config;
let adapters = adapter_matcher( let filtered_adapters = get_adapters_filtered(args.custom_adapters.clone(), &args.adapters)?;
get_adapters_filtered(args.custom_adapters.clone(), &args.adapters)?, let adapters = adapter_matcher(&filtered_adapters, args.accurate)?;
args.accurate,
)?;
let filename = filepath_hint let filename = filepath_hint
.file_name() .file_name()
.ok_or_else(|| format_err!("Empty filename"))?; .ok_or_else(|| format_err!("Empty filename"))?;
debug!("depth: {}", archive_recursion_depth); debug!("Archive recursion depth: {}", archive_recursion_depth);
if archive_recursion_depth >= args.max_archive_recursion.0 { if archive_recursion_depth >= args.max_archive_recursion.0 {
writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?; writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?;
return Ok(()); return Ok(());
} }
debug!("path_hint: {:?}", filepath_hint);
// todo: figure out when using a bufreader is a good idea and when it is not // todo: figure out when using a bufreader is a good idea and when it is not
// seems to be good for File::open() reads, but not sure about within archives (tar, zip) // seems to be good for File::open() reads, but not sure about within archives (tar, zip)
let inp = &mut BufReader::with_capacity(1 << 13, inp); let inp = &mut BufReader::with_capacity(1 << 13, inp);
@ -69,38 +69,47 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
Some((adapter, detection_reason)) => { Some((adapter, detection_reason)) => {
let meta = adapter.metadata(); let meta = adapter.metadata();
debug!( debug!(
"chose adapter '{}' because of matcher {:?}", "Chose adapter '{}' because of matcher {:?}",
&meta.name, &detection_reason &meta.name, &detection_reason
); );
eprintln!("adapter: {}", &meta.name); eprintln!(
"{} adapter: {}",
filepath_hint.to_string_lossy(),
&meta.name
);
let db_name = format!("{}.v{}", meta.name, meta.version); let db_name = format!("{}.v{}", meta.name, meta.version);
if let Some(cache) = cache.as_mut() { if let Some(cache) = cache.as_mut() {
let cache_key: Vec<u8> = { let cache_key: Vec<u8> = {
let clean_path = filepath_hint.to_owned().clean(); let clean_path = filepath_hint.to_owned().clean();
let meta = std::fs::metadata(&filepath_hint)?; let meta = std::fs::metadata(&filepath_hint)?;
let modified = meta.modified().expect("weird OS that can't into mtime");
if adapter.metadata().recurses { if adapter.metadata().recurses {
let key = ( let key = (
filtered_adapters
.iter()
.map(|a| (a.metadata().name.clone(), a.metadata().version))
.collect::<Vec<_>>(),
clean_path, clean_path,
meta.modified().expect("weird OS that can't into mtime"), modified,
&args.adapters[..],
); );
debug!("cache key: {:?}", key); debug!("Cache key (with recursion): {:?}", key);
bincode::serialize(&key).expect("could not serialize path") bincode::serialize(&key).expect("could not serialize path")
// key in the cache database
} else { } else {
let key = ( let key = (
adapter.metadata().name.clone(),
adapter.metadata().version,
clean_path, clean_path,
meta.modified().expect("weird OS that can't into mtime"), modified,
); );
debug!("cache key: {:?}", key); debug!("Cache key (no recursion): {:?}", key);
bincode::serialize(&key).expect("could not serialize path") bincode::serialize(&key).expect("could not serialize path")
// key in the cache database
} }
}; };
cache.write().unwrap().get_or_run( cache.write().unwrap().get_or_run(
&db_name, &db_name,
&cache_key, &cache_key,
&adapter.metadata().name,
Box::new(|| -> Result<Option<Vec<u8>>> { Box::new(|| -> Result<Option<Vec<u8>>> {
// wrapping BufWriter here gives ~10% perf boost // wrapping BufWriter here gives ~10% perf boost
let mut compbuf = BufWriter::new(CachingWriter::new( let mut compbuf = BufWriter::new(CachingWriter::new(
@ -108,7 +117,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
args.cache_max_blob_len.0.try_into().unwrap(), args.cache_max_blob_len.0.try_into().unwrap(),
args.cache_compression_level.0.try_into().unwrap(), args.cache_compression_level.0.try_into().unwrap(),
)?); )?);
debug!("adapting..."); debug!("adapting with caching...");
adapter adapter
.adapt( .adapt(
AdaptInfo { AdaptInfo {
@ -129,13 +138,16 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
meta.name meta.name
) )
})?; })?;
let compressed = compbuf let (uncompressed_size, compressed) = compbuf
.into_inner() .into_inner()
.map_err(|_| "could not finish zstd") // can't use with_context here .map_err(|_| anyhow!("could not finish zstd"))? // can't use with_context here
.unwrap()
.finish()?; .finish()?;
debug!(
"uncompressed output: {}",
print_bytes(uncompressed_size as f64)
);
if let Some(cached) = compressed { if let Some(cached) = compressed {
debug!("compressed len: {}", cached.len()); debug!("compressed output: {}", print_bytes(cached.len() as f64));
Ok(Some(cached)) Ok(Some(cached))
} else { } else {
Ok(None) Ok(None)
@ -149,8 +161,9 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
)?; )?;
Ok(()) Ok(())
} else { } else {
// couldn't open cache // no cache arc - probably within archive
debug!("adapting..."); debug!("adapting without caching...");
let start = Instant::now();
adapter adapter
.adapt( .adapt(
AdaptInfo { AdaptInfo {
@ -171,6 +184,11 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
meta.name meta.name
) )
})?; })?;
debug!(
"running adapter {} took {}",
adapter.metadata().name,
print_dur(start)
);
Ok(()) Ok(())
} }
} }

View File

@ -1,9 +1,10 @@
use crate::project_dirs; use crate::{print_bytes, print_dur, project_dirs};
use anyhow::{format_err, Context, Result}; use anyhow::{format_err, Context, Result};
use log::*; use log::*;
use std::{ use std::{
fmt::Display, fmt::Display,
sync::{Arc, RwLock}, sync::{Arc, RwLock},
time::Instant,
}; };
pub fn open() -> Result<Arc<RwLock<dyn PreprocCache>>> { pub fn open() -> Result<Arc<RwLock<dyn PreprocCache>>> {
@ -15,6 +16,7 @@ pub trait PreprocCache {
&mut self, &mut self,
db_name: &str, db_name: &str,
key: &[u8], key: &[u8],
adapter_name: &str,
runner: Box<dyn FnOnce() -> Result<Option<Vec<u8>>> + 'a>, runner: Box<dyn FnOnce() -> Result<Option<Vec<u8>>> + 'a>,
callback: Box<dyn FnOnce(&[u8]) -> Result<()> + 'a>, callback: Box<dyn FnOnce(&[u8]) -> Result<()> + 'a>,
) -> Result<()>; ) -> Result<()>;
@ -73,9 +75,11 @@ impl PreprocCache for LmdbCache {
&mut self, &mut self,
db_name: &str, db_name: &str,
key: &[u8], key: &[u8],
adapter_name: &str,
runner: Box<dyn FnOnce() -> Result<Option<Vec<u8>>> + 'a>, runner: Box<dyn FnOnce() -> Result<Option<Vec<u8>>> + 'a>,
callback: Box<dyn FnOnce(&[u8]) -> Result<()> + 'a>, callback: Box<dyn FnOnce(&[u8]) -> Result<()> + 'a>,
) -> Result<()> { ) -> Result<()> {
let start = Instant::now();
let db_env = self.db_arc.read().unwrap(); let db_env = self.db_arc.read().unwrap();
let db = db_env let db = db_env
.open_single(db_name, rkv::store::Options::create()) .open_single(db_name, rkv::store::Options::create())
@ -90,14 +94,19 @@ impl PreprocCache for LmdbCache {
match cached { match cached {
Some(rkv::Value::Blob(cached)) => { Some(rkv::Value::Blob(cached)) => {
debug!("got cached"); debug!("cache HIT, reading from cache");
debug!("reading from cache took {}", print_dur(start));
callback(cached)?; callback(cached)?;
} }
Some(_) => Err(format_err!("Integrity: value not blob"))?, Some(_) => Err(format_err!("Integrity: value not blob"))?,
None => { None => {
debug!("did not get cached"); debug!("cache MISS, running adapter");
drop(reader); drop(reader);
if let Some(got) = runner()? { let runner_res = runner()?;
debug!("running adapter {} took {}", adapter_name, print_dur(start));
let start = Instant::now();
if let Some(got) = runner_res {
debug!("writing {} to cache", print_bytes(got.len() as f64));
let mut writer = db_env let mut writer = db_env
.write() .write()
.map_err(RkvErrWrap) .map_err(RkvErrWrap)
@ -109,6 +118,9 @@ impl PreprocCache for LmdbCache {
.commit() .commit()
.map_err(RkvErrWrap) .map_err(RkvErrWrap)
.with_context(|| format!("could not write cache"))?; .with_context(|| format!("could not write cache"))?;
debug!("writing to cache took {}", print_dur(start));
} else {
debug!("not caching output");
} }
} }
}; };