From 83114f66bfc9b519b38ad4cba33f6cdf2a76fc58 Mon Sep 17 00:00:00 2001 From: phiresky Date: Thu, 6 Jun 2019 17:59:15 +0200 Subject: [PATCH] tar adapter (broken compression) --- .gitignore | 1 + Cargo.lock | 115 +++++++++++++++++++++++++++++++++++++++ Cargo.toml | 4 ++ README.md | 9 ++- src/adapters.rs | 6 ++ src/adapters/spawning.rs | 4 +- src/adapters/tar.rs | 91 +++++++++++++++++++++++++++++++ 7 files changed, 227 insertions(+), 3 deletions(-) create mode 100644 src/adapters/tar.rs diff --git a/.gitignore b/.gitignore index 53eaa21..97bec05 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target +/.idea **/*.rs.bk diff --git a/Cargo.lock b/Cargo.lock index 2978208..b135fbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -85,6 +85,11 @@ name = "bitflags" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "build_const" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "byteorder" version = "1.3.1" @@ -148,6 +153,14 @@ dependencies = [ "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "crc" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "crc32fast" version = "1.2.0" @@ -285,11 +298,33 @@ dependencies = [ "synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "filetime" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "fixedbitset" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "flate2" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)", + "miniz-sys 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", + "miniz_oxide_c_api 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "fnv" version = "1.0.6" @@ -385,6 +420,16 @@ dependencies = [ "cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "lzma-sys" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "matches" version = "0.1.8" @@ -400,6 +445,34 @@ name = "memoffset" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "miniz-sys" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "miniz_oxide" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "miniz_oxide_c_api" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)", + "crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)", + "miniz_oxide 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "nodrop" version = "0.1.13" @@ -682,11 +755,13 @@ name = "rga" version = "0.1.0" dependencies = [ "bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "bzip2 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "cachedir 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "flate2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -694,7 +769,9 @@ dependencies = [ "rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", + "tar 0.4.26 (registry+https://github.com/rust-lang/crates.io-index)", "tree_magic_fork 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "xz2 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "zip 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -816,6 +893,17 @@ dependencies = [ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "tar" +version = "0.4.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "filetime 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)", + "xattr 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "termcolor" version = "1.0.5" @@ -965,6 +1053,22 @@ dependencies = [ "winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "xattr" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "xz2" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lzma-sys 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "zip" version = "0.5.2" @@ -1016,6 +1120,7 @@ dependencies = [ "checksum backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)" = "797c830ac25ccc92a7f8a7b9862bde440715531514594a6154e3d4a54dd769b6" "checksum bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "9f04a5e50dc80b3d5d35320889053637d15011aed5e66b66b37ae798c65da6f7" "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" +"checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39" "checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb" "checksum bzip2 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "42b7c3cbf0fa9c1b82308d57191728ca0256cb821220f4e2fd410a72ade26e3b" "checksum bzip2-sys 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6584aa36f5ad4c9247f5323b0a42f37802b37a836f0ad87084d7a33961abe25f" @@ -1024,6 +1129,7 @@ dependencies = [ "checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33" "checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" +"checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" "checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" "checksum crossbeam 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b14492071ca110999a20bf90e3833406d5d66bfd93b4e52ec9539025ff43fe0d" "checksum crossbeam-channel 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "0f0ed1a4de2235cabda8558ff5840bffb97fcb64c97827f354a451307df5f72b" @@ -1038,7 +1144,9 @@ dependencies = [ "checksum env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b61fa891024a945da30a9581546e8cfaf5602c7b3f4c137a2805cf388f92075a" "checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2" "checksum failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1063915fd7ef4309e222a5a07cf9c319fb9c7836b1f89b85458672dbb127e1" +"checksum filetime 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "450537dc346f0c4d738dda31e790da1da5d4bd12145aad4da0d03d713cb3794f" "checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33" +"checksum flate2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f87e68aa82b2de08a6e037f1385455759df6e445a8df5e005b4297191dbf18aa" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" "checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" @@ -1052,9 +1160,13 @@ dependencies = [ "checksum lmdb-rkv-sys 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1470e0168f1832e35afd6d0931ae60db625685332837b97aa156773ec9c5e393" "checksum lock_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ed946d4529956a20f2d63ebe1b69996d5a2137c91913fe3ebbeff957f5bca7ff" "checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" +"checksum lzma-sys 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "16b5c59c57cc4d39e7999f50431aa312ea78af7c93b23fbb0c3567bd672e7f35" "checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" "checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39" "checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3" +"checksum miniz-sys 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9e3ae51cea1576ceba0dde3d484d30e6e5b86dee0b2d412fe3a16a15c98202" +"checksum miniz_oxide 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c468f2369f07d651a5d0bb2c9079f8488a66d5466efe42d0c5c6466edcb7f71e" +"checksum miniz_oxide_c_api 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b7fe927a42e3807ef71defb191dc87d4e24479b221e67015fe38ae2b7b447bab" "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" "checksum nom 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf51a729ecf40266a2368ad335a5fdde43471f545a967109cd62146ecf8b66ff" "checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32" @@ -1104,6 +1216,7 @@ dependencies = [ "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" "checksum syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)" = "a1393e4a97a19c01e900df2aec855a29f71cf02c402e2f443b8d2747c25c5dbe" "checksum synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f" +"checksum tar 0.4.26 (registry+https://github.com/rust-lang/crates.io-index)" = "b3196bfbffbba3e57481b6ea32249fbaf590396a52505a2615adbb79d9d826d3" "checksum termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "96d6098003bde162e4277c70665bd87c326f5a0c3f3fbfb285787fa482d54e6e" "checksum termion 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dde0593aeb8d47accea5392b39350015b5eccb12c0d98044d856983d89548dea" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" @@ -1124,6 +1237,8 @@ dependencies = [ "checksum winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7168bab6e1daee33b4557efd0e95d5ca70a03706d39fa5f3fe7a236f584b03c9" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" "checksum wincolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "561ed901ae465d6185fa7864d63fbd5720d0ef718366c9a4dc83cf6170d7e9ba" +"checksum xattr 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c" +"checksum xz2 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c179869f34fc7c01830d3ce7ea2086bc3a07e0d35289b667d0a8bf910258926c" "checksum zip 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c18fc320faf909036e46ac785ea827f72e485304877faf1a3a39538d3714dbc3" "checksum zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2c5a6414958b49ee80f2dd0042023ac8f37cfe1d31fbeec0b9749cf6f2c03683" "checksum zstd-safe 1.4.9+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d98332212af687878b146a6549c188e9b72971972d23089c831472f938e6272" diff --git a/Cargo.toml b/Cargo.toml index 731d1ce..832df00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,3 +33,7 @@ crossbeam = "0.7.1" clap = "2.33.0" log = "0.4.6" env_logger = "0.6.1" +xz2 = "0.1.6" +flate2 = "1.0.7" +bzip2 = "0.3.3" +tar = "0.4.26" diff --git a/README.md b/README.md index 2eb3fa1..9ce4aed 100644 --- a/README.md +++ b/README.md @@ -1 +1,8 @@ -similar: https://gist.github.com/ColonolBuendia/314826e37ec35c616d70506c38dc65aa +similar: + +- pdfgrep +- https://gist.github.com/ColonolBuendia/314826e37ec35c616d70506c38dc65aa + +# considerations + +- matching on mime (magic bytes) instead of filename diff --git a/src/adapters.rs b/src/adapters.rs index 74c7527..61e9964 100644 --- a/src/adapters.rs +++ b/src/adapters.rs @@ -2,6 +2,7 @@ pub mod ffmpeg; pub mod pandoc; pub mod poppler; pub mod spawning; +pub mod tar; pub mod zip; use failure::*; use regex::{Regex, RegexSet}; @@ -13,6 +14,10 @@ use std::rc::Rc; pub enum Matcher { // MimeType(Regex), + /** + * without the dot. e.g. "jpg" or "tar.gz" matched as /.*\.ext$/ + * + */ FileExtension(String), } @@ -53,6 +58,7 @@ pub fn get_adapters() -> Vec> { Rc::new(pandoc::PandocAdapter::new()), Rc::new(poppler::PopplerAdapter::new()), Rc::new(zip::ZipAdapter::new()), + Rc::new(tar::TarAdapter::new()), ]; adapters } diff --git a/src/adapters/spawning.rs b/src/adapters/spawning.rs index 60c72e1..77e88dd 100644 --- a/src/adapters/spawning.rs +++ b/src/adapters/spawning.rs @@ -103,7 +103,7 @@ where fn adapt(&self, ai: AdaptInfo) -> Fallible<()> { let AdaptInfo { filepath_hint, - inp, + mut inp, oup, line_prefix, .. @@ -112,7 +112,7 @@ where pipe_output( line_prefix, self.command(filepath_hint, cmd), - inp, + &mut inp, oup, self.get_exe(), "", diff --git a/src/adapters/tar.rs b/src/adapters/tar.rs new file mode 100644 index 0000000..2ae4ce6 --- /dev/null +++ b/src/adapters/tar.rs @@ -0,0 +1,91 @@ +use super::*; +use crate::preproc::rga_preproc; +use ::tar::EntryType::Regular; +use failure::*; +use lazy_static::lazy_static; +use std::fs::File; +use std::path::PathBuf; + +static EXTENSIONS: &[&str] = &["tar", "tar.gz", "tar.bz2", "tar.xz", "tar.zst"]; + +lazy_static! { + static ref METADATA: AdapterMeta = AdapterMeta { + name: "tar".to_owned(), + version: 1, + matchers: EXTENSIONS + .iter() + .map(|s| Matcher::FileExtension(s.to_string())) + .collect(), + }; +} + +pub struct TarAdapter; + +impl TarAdapter { + pub fn new() -> TarAdapter { + TarAdapter + } +} +impl GetMetadata for TarAdapter { + fn metadata<'a>(&'a self) -> &'a AdapterMeta { + &METADATA + } +} +/*struct WrapRead<'a> { + inner: &mut 'a Read; +} +impl Read for WrapRead { + r +}*/ + +/*fn decompress_any(filename: &Path, inp: &mut Read) -> Fallible> { + let extension = filename.extension().map(|e| e.to_string_lossy().to_owned()); + match extension { + Some(e) => Ok(match e.to_owned().as_ref() { + "gz" => Box::new(flate2::read::MultiGzDecoder::new(inp)), + "bz2" => Box::new(bzip2::read::BzDecoder::new(inp)), + "xz" => Box::new(xz2::read::XzDecoder::new_multi_decoder(inp)), + "zst" => Box::new(zstd::stream::read::Decoder::new(inp)?), + e => Err(format_err!("don't know how to decompress {}", e))?, + }), + None => Err(format_err!("no extension")), + } +}*/ + +impl FileAdapter for TarAdapter { + fn adapt<'a>(&self, ai: AdaptInfo) -> Fallible<()> { + use std::io::prelude::*; + let AdaptInfo { + filepath_hint, + mut inp, + oup, + line_prefix, + .. + } = ai; + let decompress = inp; //decompress_any(filepath_hint, &inp)?; + let mut archive = ::tar::Archive::new(decompress); + for entry in archive.entries()? { + let mut file = entry.unwrap(); + let path = PathBuf::from(file.path()?.to_owned()); + eprintln!( + "{}|{}: {} bytes", + filepath_hint.display(), + path.display(), + file.header().size()?, + ); + if Regular == file.header().entry_type() { + let line_prefix = &format!("{}{}: ", line_prefix, path.display()); + rga_preproc( + AdaptInfo { + filepath_hint: &path, + inp: &mut file, + oup: oup, + line_prefix, + }, + None, + )?; + } + } + Ok(()) + } +}