From d8b57f2f8a74b5e8f0e1b3089da69e69579c49cd Mon Sep 17 00:00:00 2001 From: phiresky Date: Sun, 16 Jun 2019 11:07:29 +0200 Subject: [PATCH] split decompress adapter --- CHANGELOG.md | 5 ++ exampledir/compress/test.log | 2 + exampledir/compress/test.log.bz2 | Bin 0 -> 62 bytes exampledir/compress/test.log.gz | Bin 0 -> 54 bytes exampledir/compress/test.log.xz | Bin 0 -> 84 bytes exampledir/compress/test.log.zst | Bin 0 -> 40 bytes src/adapters.rs | 4 +- src/adapters/decompress.rs | 125 +++++++++++++++++++++++++++++++ src/adapters/tar.rs | 24 +----- src/preproc.rs | 9 ++- 10 files changed, 143 insertions(+), 26 deletions(-) create mode 100644 exampledir/compress/test.log create mode 100644 exampledir/compress/test.log.bz2 create mode 100644 exampledir/compress/test.log.gz create mode 100644 exampledir/compress/test.log.xz create mode 100644 exampledir/compress/test.log.zst create mode 100644 src/adapters/decompress.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index fc2e95e..a64e41a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.9.0 (2019-06-16) + +- Split decompress and tar adapter so we can also read pure .bz2 files etc +- Add mime type detection to decompress so we can read e.g. /boot/initramfs.img which is a bz2 file without ending + # 0.8.9 (2019-06-15) - Finally fix linux binary package diff --git a/exampledir/compress/test.log b/exampledir/compress/test.log new file mode 100644 index 0000000..560c37f --- /dev/null +++ b/exampledir/compress/test.log @@ -0,0 +1,2 @@ +hello world +this is a test diff --git a/exampledir/compress/test.log.bz2 b/exampledir/compress/test.log.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..1c3a8728bd2c9b0d0ff9cfe8f68bacca89d5927d GIT binary patch literal 62 zcmV-E0Kxx4T4*^jL0KkKS#Gbq4FCWIQGfstKmaCWoPZzzF#ym36Gy41spK%@s+as^ UajJ_q0hKr|qC|y~)Z!8@ M0NvO@vKs&Z04R$T`v3p{ literal 0 HcmV?d00001 diff --git a/exampledir/compress/test.log.xz b/exampledir/compress/test.log.xz new file mode 100644 index 0000000000000000000000000000000000000000..b2f182d9f60c57d85d1a58236814c989c48573c5 GIT binary patch literal 84 zcmexsUKJ6=z`*kC+7>q^21Q0O1_p)_{ill=8Kg2&b8_+(%JYkIQn*SoGK&>}C{dv# mwYY?ff#J!~N5|^^nCxR (Vec>, Vec> Rc::new(pandoc::PandocAdapter::new()), Rc::new(poppler::PopplerAdapter::new()), Rc::new(zip::ZipAdapter::new()), + Rc::new(decompress::DecompressAdapter::new()), Rc::new(tar::TarAdapter::new()), Rc::new(sqlite::SqliteAdapter::new()), ]; diff --git a/src/adapters/decompress.rs b/src/adapters/decompress.rs new file mode 100644 index 0000000..0902d66 --- /dev/null +++ b/src/adapters/decompress.rs @@ -0,0 +1,125 @@ +use super::*; +use crate::preproc::rga_preproc; +use failure::*; +use lazy_static::lazy_static; + +use std::path::PathBuf; + +static EXTENSIONS: &[&str] = &["tgz", "tbz", "tbz2", "gz", "bz2", "xz", "zst"]; +static MIME_TYPES: &[&str] = &[ + "application/gzip", + "application/x-bzip", + "application/x-xz", + "application/zstd", +]; +lazy_static! { + static ref METADATA: AdapterMeta = AdapterMeta { + name: "decompress".to_owned(), + version: 1, + description: + "Reads compressed file as a stream and runs a different extractor on the contents." + .to_owned(), + fast_matchers: EXTENSIONS + .iter() + .map(|s| FastMatcher::FileExtension(s.to_string())) + .collect(), + slow_matchers: Some( + MIME_TYPES + .iter() + .map(|s| SlowMatcher::MimeType(s.to_string())) + .collect() + ), + }; +} +#[derive(Default)] +pub struct DecompressAdapter; + +impl DecompressAdapter { + pub fn new() -> DecompressAdapter { + DecompressAdapter + } +} +impl GetMetadata for DecompressAdapter { + fn metadata(&self) -> &AdapterMeta { + &METADATA + } +} + +fn decompress_any<'a, R>(filename: &Path, inp: &'a mut R) -> Fallible> +where + R: Read, +{ + let extension = filename.extension().map(|e| e.to_string_lossy().to_owned()); + + match extension { + Some(e) => Ok(match e.to_owned().as_ref() { + "tgz" | "gz" => Box::new(flate2::read::MultiGzDecoder::new(inp)), + "tbz" | "tbz2" | "bz2" => Box::new(bzip2::read::BzDecoder::new(inp)), + "xz" => Box::new(xz2::read::XzDecoder::new_multi_decoder(inp)), + "zst" => Box::new(zstd::stream::read::Decoder::new(inp)?), + ext => Err(format_err!("don't know how to decompress {}", ext))?, + }), + None => Err(format_err!("no extension")), + } +} +fn get_inner_filename(filename: &Path) -> PathBuf { + let extension = filename + .extension() + .map(|e| e.to_string_lossy().to_owned()) + .unwrap_or(Cow::Borrowed("")); + let stem = filename + .file_stem() + .expect("no filename given?") + .to_string_lossy(); + let new_extension = match extension.to_owned().as_ref() { + "tgz" | "tbz" | "tbz2" => ".tar", + _other => "", + }; + filename.with_file_name(format!("{}{}", stem, new_extension)) +} + +impl FileAdapter for DecompressAdapter { + fn adapt(&self, ai: AdaptInfo) -> Fallible<()> { + let AdaptInfo { + filepath_hint, + mut inp, + oup, + line_prefix, + archive_recursion_depth, + config, + .. + } = ai; + + let mut decompress = decompress_any(filepath_hint, &mut inp)?; + let ai2: AdaptInfo = AdaptInfo { + filepath_hint: &get_inner_filename(filepath_hint), + is_real_file: false, + archive_recursion_depth: archive_recursion_depth + 1, + inp: &mut decompress, + oup, + line_prefix, + config: config.clone(), + }; + rga_preproc(ai2)?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_inner_filename() { + for (a, b) in &[ + ("hi/test.tgz", "hi/test.tar"), + ("hi/hello.gz", "hi/hello"), + ("a/b/initramfs", "a/b/initramfs"), + ("hi/test.tbz2", "hi/test.tar"), + ("hi/test.tbz", "hi/test.tar"), + ("hi/test.hi.bz2", "hi/test.hi"), + ("hello.tar.gz", "hello.tar"), + ] { + assert_eq!(get_inner_filename(&PathBuf::from(a)).to_string_lossy(), *b); + } + } +} diff --git a/src/adapters/tar.rs b/src/adapters/tar.rs index 367f7fc..acd03d5 100644 --- a/src/adapters/tar.rs +++ b/src/adapters/tar.rs @@ -6,7 +6,7 @@ use lazy_static::lazy_static; use std::path::PathBuf; -static EXTENSIONS: &[&str] = &["tar", "tar.gz", "tar.bz2", "tar.xz", "tar.zst"]; +static EXTENSIONS: &[&str] = &["tar"]; lazy_static! { static ref METADATA: AdapterMeta = AdapterMeta { @@ -34,24 +34,6 @@ impl GetMetadata for TarAdapter { } } -fn decompress_any<'a, R>(filename: &Path, inp: &'a mut R) -> Fallible> -where - R: Read, -{ - let extension = filename.extension().map(|e| e.to_string_lossy().to_owned()); - match extension { - Some(e) => Ok(match e.to_owned().as_ref() { - "tgz" | "gz" => Box::new(flate2::read::MultiGzDecoder::new(inp)), - "tbz" | "tbz2" | "bz2" => Box::new(bzip2::read::BzDecoder::new(inp)), - "xz" => Box::new(xz2::read::XzDecoder::new_multi_decoder(inp)), - "zst" => Box::new(zstd::stream::read::Decoder::new(inp)?), - "tar" => Box::new(inp), - ext => Err(format_err!("don't know how to decompress {}", ext))?, - }), - None => Err(format_err!("no extension")), - } -} - impl FileAdapter for TarAdapter { fn adapt(&self, ai: AdaptInfo) -> Fallible<()> { let AdaptInfo { @@ -63,9 +45,7 @@ impl FileAdapter for TarAdapter { config, .. } = ai; - - let decompress = decompress_any(filepath_hint, &mut inp)?; - let mut archive = ::tar::Archive::new(decompress); + let mut archive = ::tar::Archive::new(&mut inp); for entry in archive.entries()? { let mut file = entry.unwrap(); if Regular == file.header().entry_type() { diff --git a/src/preproc.rs b/src/preproc.rs index 0d2e5fe..9cb7b2e 100644 --- a/src/preproc.rs +++ b/src/preproc.rs @@ -134,14 +134,17 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> { } } None => { - // allow passthrough if the file is in an archive, + // allow passthrough if the file is in an archive or accurate matching is enabled // otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us - let allow_cat = !is_real_file; + let allow_cat = !is_real_file || args.accurate; if allow_cat { spawning::postproc_line_prefix(line_prefix, inp, oup)?; Ok(()) } else { - Err(format_err!("No adapter found for file {:?}", filename)) + Err(format_err!( + "No adapter found for file {:?}, passthrough disabled.", + filename + )) } } }