ripgrep-all/src/adapters/tar.rs

122 lines
3.7 KiB
Rust
Raw Normal View History

2019-06-06 15:59:15 +00:00
use super::*;
use crate::preproc::rga_preproc;
use ::tar::EntryType::Regular;
use failure::*;
use lazy_static::lazy_static;
2019-06-06 21:43:30 +00:00
2019-06-06 21:19:59 +00:00
use std::io::BufReader;
2019-06-06 15:59:15 +00:00
use std::path::PathBuf;
static EXTENSIONS: &[&str] = &["tar", "tar.gz", "tar.bz2", "tar.xz", "tar.zst"];
lazy_static! {
static ref METADATA: AdapterMeta = AdapterMeta {
name: "tar".to_owned(),
version: 1,
matchers: EXTENSIONS
.iter()
.map(|s| Matcher::FileExtension(s.to_string()))
.collect(),
};
}
2019-06-06 21:43:30 +00:00
#[derive(Default)]
2019-06-06 15:59:15 +00:00
pub struct TarAdapter;
impl TarAdapter {
pub fn new() -> TarAdapter {
TarAdapter
}
}
impl GetMetadata for TarAdapter {
2019-06-06 21:43:30 +00:00
fn metadata(&self) -> &AdapterMeta {
2019-06-06 15:59:15 +00:00
&METADATA
}
}
2019-06-06 21:19:59 +00:00
// make a &mut Read into a owned Read because the streaming decompressors want to take ownership of their base Reads
struct WrapRead<'a> {
inner: &'a mut dyn Read,
}
impl<'a> Read for WrapRead<'a> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
self.inner.read(buf)
}
2019-06-06 15:59:15 +00:00
}
2019-06-06 21:19:59 +00:00
// feeling a little stupid here. why is this needed at all
enum SpecRead<R: Read> {
Gz(flate2::read::MultiGzDecoder<R>),
Bz2(bzip2::read::BzDecoder<R>),
Xz(xz2::read::XzDecoder<R>),
Zst(zstd::stream::read::Decoder<BufReader<R>>),
Passthrough(R),
}
impl<R: Read> Read for SpecRead<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
use SpecRead::*;
match self {
Gz(z) => z.read(buf),
Bz2(z) => z.read(buf),
Xz(z) => z.read(buf),
Zst(z) => z.read(buf),
Passthrough(z) => z.read(buf),
}
}
}
// why do I need to wrap the output here in a specific type? is it possible with just a Box<Read> for every type?
fn decompress_any<'a, R>(filename: &Path, inp: &'a mut R) -> Fallible<SpecRead<WrapRead<'a>>>
where
R: Read,
{
let inp = WrapRead { inner: inp };
2019-06-06 15:59:15 +00:00
let extension = filename.extension().map(|e| e.to_string_lossy().to_owned());
match extension {
Some(e) => Ok(match e.to_owned().as_ref() {
2019-06-06 21:19:59 +00:00
"gz" => SpecRead::Gz(flate2::read::MultiGzDecoder::new(inp)),
"bz2" => SpecRead::Bz2(bzip2::read::BzDecoder::new(inp)),
"xz" => SpecRead::Xz(xz2::read::XzDecoder::new_multi_decoder(inp)),
"zst" => SpecRead::Zst(zstd::stream::read::Decoder::new(inp)?),
"tar" => SpecRead::Passthrough(inp),
ext => Err(format_err!("don't know how to decompress {}", ext))?,
2019-06-06 15:59:15 +00:00
}),
None => Err(format_err!("no extension")),
}
2019-06-06 21:19:59 +00:00
}
2019-06-06 15:59:15 +00:00
impl FileAdapter for TarAdapter {
2019-06-06 21:19:59 +00:00
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
2019-06-06 15:59:15 +00:00
let AdaptInfo {
filepath_hint,
mut inp,
oup,
line_prefix,
..
} = ai;
2019-06-06 21:19:59 +00:00
let decompress = decompress_any(filepath_hint, &mut inp)?;
2019-06-06 15:59:15 +00:00
let mut archive = ::tar::Archive::new(decompress);
for entry in archive.entries()? {
let mut file = entry.unwrap();
let path = PathBuf::from(file.path()?.to_owned());
eprintln!(
"{}|{}: {} bytes",
filepath_hint.display(),
path.display(),
file.header().size()?,
);
if Regular == file.header().entry_type() {
let line_prefix = &format!("{}{}: ", line_prefix, path.display());
2019-06-06 21:19:59 +00:00
let ai2: AdaptInfo = AdaptInfo {
filepath_hint: &path,
2019-06-06 21:43:30 +00:00
is_real_file: false,
2019-06-06 21:19:59 +00:00
inp: &mut file,
2019-06-06 21:43:30 +00:00
oup,
2019-06-06 21:19:59 +00:00
line_prefix,
};
rga_preproc(ai2, None)?;
2019-06-06 15:59:15 +00:00
}
}
Ok(())
}
}