mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-24 12:24:56 +00:00
split decompress adapter
This commit is contained in:
parent
c9ff85ad42
commit
d8b57f2f8a
@ -1,3 +1,8 @@
|
|||||||
|
# 0.9.0 (2019-06-16)
|
||||||
|
|
||||||
|
- Split decompress and tar adapter so we can also read pure .bz2 files etc
|
||||||
|
- Add mime type detection to decompress so we can read e.g. /boot/initramfs.img which is a bz2 file without ending
|
||||||
|
|
||||||
# 0.8.9 (2019-06-15)
|
# 0.8.9 (2019-06-15)
|
||||||
|
|
||||||
- Finally fix linux binary package
|
- Finally fix linux binary package
|
||||||
|
2
exampledir/compress/test.log
Normal file
2
exampledir/compress/test.log
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
hello world
|
||||||
|
this is a test
|
BIN
exampledir/compress/test.log.bz2
Normal file
BIN
exampledir/compress/test.log.bz2
Normal file
Binary file not shown.
BIN
exampledir/compress/test.log.gz
Normal file
BIN
exampledir/compress/test.log.gz
Normal file
Binary file not shown.
BIN
exampledir/compress/test.log.xz
Normal file
BIN
exampledir/compress/test.log.xz
Normal file
Binary file not shown.
BIN
exampledir/compress/test.log.zst
Normal file
BIN
exampledir/compress/test.log.zst
Normal file
Binary file not shown.
@ -1,3 +1,4 @@
|
|||||||
|
pub mod decompress;
|
||||||
pub mod ffmpeg;
|
pub mod ffmpeg;
|
||||||
pub mod pandoc;
|
pub mod pandoc;
|
||||||
pub mod pdfpages;
|
pub mod pdfpages;
|
||||||
@ -11,7 +12,7 @@ use crate::matching::*;
|
|||||||
use crate::preproc::PreprocConfig;
|
use crate::preproc::PreprocConfig;
|
||||||
use failure::*;
|
use failure::*;
|
||||||
use log::*;
|
use log::*;
|
||||||
use regex::{Regex};
|
use regex::Regex;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
@ -78,6 +79,7 @@ pub fn get_all_adapters() -> (Vec<Rc<dyn FileAdapter>>, Vec<Rc<dyn FileAdapter>>
|
|||||||
Rc::new(pandoc::PandocAdapter::new()),
|
Rc::new(pandoc::PandocAdapter::new()),
|
||||||
Rc::new(poppler::PopplerAdapter::new()),
|
Rc::new(poppler::PopplerAdapter::new()),
|
||||||
Rc::new(zip::ZipAdapter::new()),
|
Rc::new(zip::ZipAdapter::new()),
|
||||||
|
Rc::new(decompress::DecompressAdapter::new()),
|
||||||
Rc::new(tar::TarAdapter::new()),
|
Rc::new(tar::TarAdapter::new()),
|
||||||
Rc::new(sqlite::SqliteAdapter::new()),
|
Rc::new(sqlite::SqliteAdapter::new()),
|
||||||
];
|
];
|
||||||
|
125
src/adapters/decompress.rs
Normal file
125
src/adapters/decompress.rs
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
use super::*;
|
||||||
|
use crate::preproc::rga_preproc;
|
||||||
|
use failure::*;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
static EXTENSIONS: &[&str] = &["tgz", "tbz", "tbz2", "gz", "bz2", "xz", "zst"];
|
||||||
|
static MIME_TYPES: &[&str] = &[
|
||||||
|
"application/gzip",
|
||||||
|
"application/x-bzip",
|
||||||
|
"application/x-xz",
|
||||||
|
"application/zstd",
|
||||||
|
];
|
||||||
|
lazy_static! {
|
||||||
|
static ref METADATA: AdapterMeta = AdapterMeta {
|
||||||
|
name: "decompress".to_owned(),
|
||||||
|
version: 1,
|
||||||
|
description:
|
||||||
|
"Reads compressed file as a stream and runs a different extractor on the contents."
|
||||||
|
.to_owned(),
|
||||||
|
fast_matchers: EXTENSIONS
|
||||||
|
.iter()
|
||||||
|
.map(|s| FastMatcher::FileExtension(s.to_string()))
|
||||||
|
.collect(),
|
||||||
|
slow_matchers: Some(
|
||||||
|
MIME_TYPES
|
||||||
|
.iter()
|
||||||
|
.map(|s| SlowMatcher::MimeType(s.to_string()))
|
||||||
|
.collect()
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct DecompressAdapter;
|
||||||
|
|
||||||
|
impl DecompressAdapter {
|
||||||
|
pub fn new() -> DecompressAdapter {
|
||||||
|
DecompressAdapter
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl GetMetadata for DecompressAdapter {
|
||||||
|
fn metadata(&self) -> &AdapterMeta {
|
||||||
|
&METADATA
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decompress_any<'a, R>(filename: &Path, inp: &'a mut R) -> Fallible<Box<dyn Read + 'a>>
|
||||||
|
where
|
||||||
|
R: Read,
|
||||||
|
{
|
||||||
|
let extension = filename.extension().map(|e| e.to_string_lossy().to_owned());
|
||||||
|
|
||||||
|
match extension {
|
||||||
|
Some(e) => Ok(match e.to_owned().as_ref() {
|
||||||
|
"tgz" | "gz" => Box::new(flate2::read::MultiGzDecoder::new(inp)),
|
||||||
|
"tbz" | "tbz2" | "bz2" => Box::new(bzip2::read::BzDecoder::new(inp)),
|
||||||
|
"xz" => Box::new(xz2::read::XzDecoder::new_multi_decoder(inp)),
|
||||||
|
"zst" => Box::new(zstd::stream::read::Decoder::new(inp)?),
|
||||||
|
ext => Err(format_err!("don't know how to decompress {}", ext))?,
|
||||||
|
}),
|
||||||
|
None => Err(format_err!("no extension")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn get_inner_filename(filename: &Path) -> PathBuf {
|
||||||
|
let extension = filename
|
||||||
|
.extension()
|
||||||
|
.map(|e| e.to_string_lossy().to_owned())
|
||||||
|
.unwrap_or(Cow::Borrowed(""));
|
||||||
|
let stem = filename
|
||||||
|
.file_stem()
|
||||||
|
.expect("no filename given?")
|
||||||
|
.to_string_lossy();
|
||||||
|
let new_extension = match extension.to_owned().as_ref() {
|
||||||
|
"tgz" | "tbz" | "tbz2" => ".tar",
|
||||||
|
_other => "",
|
||||||
|
};
|
||||||
|
filename.with_file_name(format!("{}{}", stem, new_extension))
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileAdapter for DecompressAdapter {
|
||||||
|
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
|
||||||
|
let AdaptInfo {
|
||||||
|
filepath_hint,
|
||||||
|
mut inp,
|
||||||
|
oup,
|
||||||
|
line_prefix,
|
||||||
|
archive_recursion_depth,
|
||||||
|
config,
|
||||||
|
..
|
||||||
|
} = ai;
|
||||||
|
|
||||||
|
let mut decompress = decompress_any(filepath_hint, &mut inp)?;
|
||||||
|
let ai2: AdaptInfo = AdaptInfo {
|
||||||
|
filepath_hint: &get_inner_filename(filepath_hint),
|
||||||
|
is_real_file: false,
|
||||||
|
archive_recursion_depth: archive_recursion_depth + 1,
|
||||||
|
inp: &mut decompress,
|
||||||
|
oup,
|
||||||
|
line_prefix,
|
||||||
|
config: config.clone(),
|
||||||
|
};
|
||||||
|
rga_preproc(ai2)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
#[test]
|
||||||
|
fn test_inner_filename() {
|
||||||
|
for (a, b) in &[
|
||||||
|
("hi/test.tgz", "hi/test.tar"),
|
||||||
|
("hi/hello.gz", "hi/hello"),
|
||||||
|
("a/b/initramfs", "a/b/initramfs"),
|
||||||
|
("hi/test.tbz2", "hi/test.tar"),
|
||||||
|
("hi/test.tbz", "hi/test.tar"),
|
||||||
|
("hi/test.hi.bz2", "hi/test.hi"),
|
||||||
|
("hello.tar.gz", "hello.tar"),
|
||||||
|
] {
|
||||||
|
assert_eq!(get_inner_filename(&PathBuf::from(a)).to_string_lossy(), *b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -6,7 +6,7 @@ use lazy_static::lazy_static;
|
|||||||
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
static EXTENSIONS: &[&str] = &["tar", "tar.gz", "tar.bz2", "tar.xz", "tar.zst"];
|
static EXTENSIONS: &[&str] = &["tar"];
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref METADATA: AdapterMeta = AdapterMeta {
|
static ref METADATA: AdapterMeta = AdapterMeta {
|
||||||
@ -34,24 +34,6 @@ impl GetMetadata for TarAdapter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn decompress_any<'a, R>(filename: &Path, inp: &'a mut R) -> Fallible<Box<dyn Read + 'a>>
|
|
||||||
where
|
|
||||||
R: Read,
|
|
||||||
{
|
|
||||||
let extension = filename.extension().map(|e| e.to_string_lossy().to_owned());
|
|
||||||
match extension {
|
|
||||||
Some(e) => Ok(match e.to_owned().as_ref() {
|
|
||||||
"tgz" | "gz" => Box::new(flate2::read::MultiGzDecoder::new(inp)),
|
|
||||||
"tbz" | "tbz2" | "bz2" => Box::new(bzip2::read::BzDecoder::new(inp)),
|
|
||||||
"xz" => Box::new(xz2::read::XzDecoder::new_multi_decoder(inp)),
|
|
||||||
"zst" => Box::new(zstd::stream::read::Decoder::new(inp)?),
|
|
||||||
"tar" => Box::new(inp),
|
|
||||||
ext => Err(format_err!("don't know how to decompress {}", ext))?,
|
|
||||||
}),
|
|
||||||
None => Err(format_err!("no extension")),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FileAdapter for TarAdapter {
|
impl FileAdapter for TarAdapter {
|
||||||
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
|
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
|
||||||
let AdaptInfo {
|
let AdaptInfo {
|
||||||
@ -63,9 +45,7 @@ impl FileAdapter for TarAdapter {
|
|||||||
config,
|
config,
|
||||||
..
|
..
|
||||||
} = ai;
|
} = ai;
|
||||||
|
let mut archive = ::tar::Archive::new(&mut inp);
|
||||||
let decompress = decompress_any(filepath_hint, &mut inp)?;
|
|
||||||
let mut archive = ::tar::Archive::new(decompress);
|
|
||||||
for entry in archive.entries()? {
|
for entry in archive.entries()? {
|
||||||
let mut file = entry.unwrap();
|
let mut file = entry.unwrap();
|
||||||
if Regular == file.header().entry_type() {
|
if Regular == file.header().entry_type() {
|
||||||
|
@ -134,14 +134,17 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
// allow passthrough if the file is in an archive,
|
// allow passthrough if the file is in an archive or accurate matching is enabled
|
||||||
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
|
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
|
||||||
let allow_cat = !is_real_file;
|
let allow_cat = !is_real_file || args.accurate;
|
||||||
if allow_cat {
|
if allow_cat {
|
||||||
spawning::postproc_line_prefix(line_prefix, inp, oup)?;
|
spawning::postproc_line_prefix(line_prefix, inp, oup)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
Err(format_err!("No adapter found for file {:?}", filename))
|
Err(format_err!(
|
||||||
|
"No adapter found for file {:?}, passthrough disabled.",
|
||||||
|
filename
|
||||||
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user