check for too deep archive recursion

This commit is contained in:
phiresky 2019-06-07 20:12:24 +02:00
parent 9c5efa1970
commit 5d9dee15e8
9 changed files with 40 additions and 22 deletions

View File

@ -1,7 +1,7 @@
[package] [package]
name = "rga" name = "rga"
description = "ripgrep but for pdf, ebooks, Office documents, archives, etc" description = "ripgrep but also search in PDFs, E-Books, Office documents, etc, and also in archives"
license = "AGPL-3.0-or-later" license = "AGPL-3.0-or-later"
version = "0.4.0" version = "0.4.0"
repository = "https://github.com/phiresky/rga" repository = "https://github.com/phiresky/rga"

BIN
exampledir/droste.zip Normal file

Binary file not shown.

View File

@ -57,7 +57,7 @@ pub struct AdaptInfo<'a> {
/// prefix every output line with this string to better indicate the file's location if it is in some archive /// prefix every output line with this string to better indicate the file's location if it is in some archive
pub line_prefix: &'a str, pub line_prefix: &'a str,
// pub adapt_subobject: &'a dyn Fn(AdaptInfo) -> Fallible<()>, // pub adapt_subobject: &'a dyn Fn(AdaptInfo) -> Fallible<()>,
pub config: &'a mut PreprocConfig, pub config: PreprocConfig,
} }
pub fn extension_to_regex(extension: &str) -> Regex { pub fn extension_to_regex(extension: &str) -> Regex {

View File

@ -1,13 +1,9 @@
use super::spawning::map_exe_error;
use super::*; use super::*;
use failure::*; use failure::*;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use rusqlite::types::{ToSql, ValueRef}; use rusqlite::types::ValueRef;
use rusqlite::*; use rusqlite::*;
use serde::{Deserialize, Serialize};
use std::convert::TryInto; use std::convert::TryInto;
use std::io::BufReader;
use std::process::*;
static EXTENSIONS: &[&str] = &["db", "db3", "sqlite", "sqlite3"]; static EXTENSIONS: &[&str] = &["db", "db3", "sqlite", "sqlite3"];

View File

@ -82,7 +82,7 @@ impl FileAdapter for TarAdapter {
inp: &mut file, inp: &mut file,
oup, oup,
line_prefix, line_prefix,
config, config: config.clone(),
}; };
rga_preproc(ai2)?; rga_preproc(ai2)?;
} }

View File

@ -61,7 +61,8 @@ impl FileAdapter for ZipAdapter {
continue; continue;
} }
eprintln!( eprintln!(
"{}|{}: {} bytes ({} bytes packed)", "{}{}|{}: {} bytes ({} bytes packed)",
line_prefix,
filepath_hint.to_string_lossy(), filepath_hint.to_string_lossy(),
file.name(), file.name(),
file.size(), file.size(),
@ -75,7 +76,7 @@ impl FileAdapter for ZipAdapter {
oup, oup,
line_prefix, line_prefix,
archive_recursion_depth: archive_recursion_depth + 1, archive_recursion_depth: archive_recursion_depth + 1,
config, config: config.clone(),
})?; })?;
} }
Err(e) => return Err(e.into()), Err(e) => return Err(e.into()),

View File

@ -4,7 +4,6 @@ use rga::preproc::*;
use std::env; use std::env;
use std::fs::File; use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
fn main() -> Result<(), Error> { fn main() -> Result<(), Error> {
let path = { let path = {
let filepath = std::env::args_os() let filepath = std::env::args_os()
@ -28,7 +27,10 @@ fn main() -> Result<(), Error> {
oup: &mut o, oup: &mut o,
line_prefix: "", line_prefix: "",
archive_recursion_depth: 0, archive_recursion_depth: 0,
config: &mut PreprocConfig { cache }, config: PreprocConfig {
cache,
max_archive_recursion: 3,
},
}; };
rga_preproc(ai) rga_preproc(ai)

View File

@ -3,14 +3,16 @@ use crate::CachingWriter;
use failure::Fallible; use failure::Fallible;
use failure::{format_err, Error}; use failure::{format_err, Error};
use path_clean::PathClean; use path_clean::PathClean;
use std::convert::AsRef;
use std::io::BufWriter; use std::io::BufWriter;
// longest compressed conversion output to save in cache // longest compressed conversion output to save in cache
const MAX_DB_BLOB_LEN: usize = 2_000_000; const MAX_DB_BLOB_LEN: usize = 2_000_000;
const ZSTD_LEVEL: i32 = 12; const ZSTD_LEVEL: i32 = 12;
use std::sync::{Arc, RwLock};
#[derive(Clone)]
pub struct PreprocConfig { pub struct PreprocConfig {
pub cache: Option<Box<dyn crate::preproc_cache::PreprocCache>>, pub cache: Option<Arc<RwLock<dyn crate::preproc_cache::PreprocCache>>>,
pub max_archive_recursion: i32,
} }
/** /**
* preprocess a file as defined in `ai`. * preprocess a file as defined in `ai`.
@ -27,11 +29,21 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
oup, oup,
line_prefix, line_prefix,
config, config,
archive_recursion_depth,
.. ..
} = ai; } = ai;
let PreprocConfig {
mut cache,
max_archive_recursion,
} = config;
let filename = filepath_hint let filename = filepath_hint
.file_name() .file_name()
.ok_or_else(|| format_err!("Empty filename"))?; .ok_or_else(|| format_err!("Empty filename"))?;
eprintln!("depth: {}", archive_recursion_depth);
if archive_recursion_depth >= config.max_archive_recursion {
writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?;
return Ok(());
}
eprintln!("path_hint: {:?}", filepath_hint); eprintln!("path_hint: {:?}", filepath_hint);
@ -49,7 +61,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
let meta = ad.metadata(); let meta = ad.metadata();
eprintln!("adapter: {}", &meta.name); eprintln!("adapter: {}", &meta.name);
let db_name = format!("{}.v{}", meta.name, meta.version); let db_name = format!("{}.v{}", meta.name, meta.version);
if let Some(cache) = config.cache.as_mut() { if let Some(cache) = cache.as_mut() {
let cache_key: Vec<u8> = { let cache_key: Vec<u8> = {
let clean_path = filepath_hint.to_owned().clean(); let clean_path = filepath_hint.to_owned().clean();
let meta = std::fs::metadata(&filepath_hint)?; let meta = std::fs::metadata(&filepath_hint)?;
@ -62,7 +74,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
bincode::serialize(&key).expect("could not serialize path") // key in the cache database bincode::serialize(&key).expect("could not serialize path") // key in the cache database
}; };
cache.get_or_run( cache.write().unwrap().get_or_run(
&db_name, &db_name,
&cache_key, &cache_key,
Box::new(|| -> Fallible<Option<Vec<u8>>> { Box::new(|| -> Fallible<Option<Vec<u8>>> {
@ -76,8 +88,11 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
is_real_file, is_real_file,
inp, inp,
oup: &mut compbuf, oup: &mut compbuf,
archive_recursion_depth: 0, archive_recursion_depth,
config: &mut PreprocConfig { cache: None }, config: PreprocConfig {
cache: None,
max_archive_recursion,
},
})?; })?;
let compressed = compbuf let compressed = compbuf
.into_inner() .into_inner()
@ -104,8 +119,11 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
is_real_file, is_real_file,
inp, inp,
oup, oup,
archive_recursion_depth: 0, archive_recursion_depth,
config: &mut PreprocConfig { cache: None }, config: PreprocConfig {
cache: None,
max_archive_recursion,
},
})?; })?;
Ok(()) Ok(())
} }

View File

@ -1,7 +1,8 @@
use failure::{format_err, Fallible}; use failure::{format_err, Fallible};
use std::sync::{Arc, RwLock};
pub fn open() -> Fallible<Box<dyn PreprocCache>> { pub fn open() -> Fallible<Arc<RwLock<dyn PreprocCache>>> {
Ok(Box::new(LmdbCache::open()?)) Ok(Arc::new(RwLock::new(LmdbCache::open()?)))
} }
pub trait PreprocCache { pub trait PreprocCache {
// possible without second lambda? // possible without second lambda?