This commit is contained in:
phiresky 2019-06-06 23:43:30 +02:00
parent 83b804bef2
commit e98acedc81
12 changed files with 78 additions and 59 deletions

View File

@ -35,13 +35,16 @@ pub struct FileMeta {
}
pub trait GetMetadata {
fn metadata<'a>(&'a self) -> &'a AdapterMeta;
fn metadata(&self) -> &AdapterMeta;
}
pub trait FileAdapter: GetMetadata {
fn adapt(&self, a: AdaptInfo) -> Fallible<()>;
}
pub struct AdaptInfo<'a> {
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
pub filepath_hint: &'a Path,
/// true if filepath_hint is an actual file on the file system
pub is_real_file: bool,
pub inp: &'a mut dyn Read,
pub oup: &'a mut (dyn Write + Send),
pub line_prefix: &'a str,
@ -54,11 +57,11 @@ pub fn extension_to_regex(extension: &str) -> Regex {
pub fn get_adapters() -> Vec<Rc<dyn FileAdapter>> {
let adapters: Vec<Rc<dyn FileAdapter>> = vec![
Rc::new(ffmpeg::FFmpegAdapter::new()),
Rc::new(pandoc::PandocAdapter::new()),
Rc::new(poppler::PopplerAdapter::new()),
Rc::new(zip::ZipAdapter::new()),
Rc::new(tar::TarAdapter::new()),
Rc::new(ffmpeg::FFmpegAdapter),
Rc::new(pandoc::PandocAdapter),
Rc::new(poppler::PopplerAdapter),
Rc::new(zip::ZipAdapter),
Rc::new(tar::TarAdapter),
];
adapters
}
@ -80,14 +83,15 @@ pub fn adapter_matcher() -> Result<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapte
}
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
//let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
return Ok(move |meta: FileMeta| {
Ok(move |meta: FileMeta| {
// todo: handle multiple conflicting matches
for m in fname_regex_set.matches(&meta.lossy_filename) {
return Some(fname_regexes[m].1.clone());
let matches = fname_regex_set.matches(&meta.lossy_filename);
match matches.iter().next() {
Some(m) => Some(fname_regexes[m].1.clone()),
None => None,
}
/*for m in mime_regex_set.matches(&meta.mimetype) {
return Some(mime_regexes[m].1.clone());
}*/
return None;
});
})
}

View File

@ -21,6 +21,7 @@ lazy_static! {
};
}
#[derive(Default)]
pub struct FFmpegAdapter;
impl FFmpegAdapter {
@ -29,7 +30,7 @@ impl FFmpegAdapter {
}
}
impl GetMetadata for FFmpegAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
fn metadata(&self) -> &AdapterMeta {
&METADATA
}
}
@ -45,12 +46,17 @@ struct FFprobeStream {
impl FileAdapter for FFmpegAdapter {
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
let AdaptInfo {
is_real_file,
filepath_hint,
inp,
oup,
..
} = ai;
/*let spawn_fail = |e| map_exe_error(e, "ffprobe", "Make sure you have ffmpeg installed.");
if !is_real_file {
eprintln!("Skipping video in archive");
return Ok(());
}
let inp_fname = filepath_hint;
let spawn_fail = |e| map_exe_error(e, "ffprobe", "Make sure you have ffmpeg installed.");
let has_subtitles = {
let probe = Command::new("ffprobe")
.args(vec![
@ -65,7 +71,8 @@ impl FileAdapter for FFmpegAdapter {
])
.arg("-i")
.arg(inp_fname)
.output().map_err(spawn_fail)?;
.output()
.map_err(spawn_fail)?;
if !probe.status.success() {
return Err(format_err!("ffprobe failed: {:?}", probe.status));
}
@ -120,15 +127,13 @@ impl FileAdapter for FFmpegAdapter {
// 09:55.195 --> 09:56.730
if time_re.is_match(&line) {
time = line.to_owned();
} else if line.is_empty() {
oup.write_all(b"\n")?;
} else {
if line.len() == 0 {
oup.write(b"\n")?;
} else {
writeln!(oup, "{}: {}", time, line)?;
}
writeln!(oup, "{}: {}", time, line)?;
}
}
}*/
}
Ok(())
}
}

View File

@ -51,6 +51,7 @@ lazy_static! {
.collect(),
};
}
#[derive(Default)]
pub struct PandocAdapter;
impl PandocAdapter {
@ -59,7 +60,7 @@ impl PandocAdapter {
}
}
impl GetMetadata for PandocAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
fn metadata(&self) -> &AdapterMeta {
&METADATA
}
}

View File

@ -16,6 +16,7 @@ lazy_static! {
.collect(),
};
}
#[derive(Default)]
pub struct PopplerAdapter;
impl PopplerAdapter {
@ -25,19 +26,19 @@ impl PopplerAdapter {
}
impl GetMetadata for PopplerAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
fn metadata(&self) -> &AdapterMeta {
&METADATA
}
}
impl SpawningFileAdapter for PopplerAdapter {
fn postproc(line_prefix: &str, inp: &mut Read, oup: &mut Write) -> Fallible<()> {
fn postproc(line_prefix: &str, inp: &mut dyn Read, oup: &mut dyn Write) -> Fallible<()> {
// prepend Page X to each line
let mut page = 1;
for line in BufReader::new(inp).lines() {
let mut line = line?;
if line.contains("\x0c") {
if line.contains('\x0c') {
// page break
line = line.replace("\x0c", "");
line = line.replace('\x0c', "");
page += 1;
}
oup.write_all(format!("{}Page {}: {}\n", line_prefix, page, line).as_bytes())?;
@ -47,7 +48,7 @@ impl SpawningFileAdapter for PopplerAdapter {
fn get_exe(&self) -> &str {
"pdftotext"
}
fn command(&self, filepath_hint: &Path, mut cmd: Command) -> Command {
fn command(&self, _filepath_hint: &Path, mut cmd: Command) -> Command {
cmd.arg("-layout").arg("-").arg("-");
cmd
}

View File

@ -4,19 +4,26 @@ use std::io::prelude::*;
use std::io::BufReader;
use std::process::Command;
use std::process::Stdio;
use std::thread;
pub fn postproc_line_prefix(
line_prefix: &str,
inp: &mut dyn Read,
oup: &mut dyn Write,
) -> Fallible<()> {
//std::io::copy(inp, oup)?;
for line in BufReader::new(inp).lines() {
oup.write_all(format!("{}{}\n", line_prefix, line?).as_bytes())?;
}
Ok(())
}
pub trait SpawningFileAdapter: GetMetadata {
fn get_exe(&self) -> &str;
fn command(&self, filepath_hint: &Path, command: Command) -> Command;
fn postproc(line_prefix: &str, inp: &mut Read, oup: &mut Write) -> Fallible<()> {
//std::io::copy(inp, oup)?;
for line in BufReader::new(inp).lines() {
oup.write_all(format!("{}{}\n", line_prefix, line?).as_bytes())?;
}
Ok(())
fn postproc(line_prefix: &str, inp: &mut dyn Read, oup: &mut dyn Write) -> Fallible<()> {
postproc_line_prefix(line_prefix, inp, oup)
}
}

View File

@ -3,7 +3,7 @@ use crate::preproc::rga_preproc;
use ::tar::EntryType::Regular;
use failure::*;
use lazy_static::lazy_static;
use std::fs::File;
use std::io::BufReader;
use std::path::PathBuf;
@ -19,7 +19,7 @@ lazy_static! {
.collect(),
};
}
#[derive(Default)]
pub struct TarAdapter;
impl TarAdapter {
@ -28,7 +28,7 @@ impl TarAdapter {
}
}
impl GetMetadata for TarAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
fn metadata(&self) -> &AdapterMeta {
&METADATA
}
}
@ -108,8 +108,9 @@ impl FileAdapter for TarAdapter {
let line_prefix = &format!("{}{}: ", line_prefix, path.display());
let ai2: AdaptInfo = AdaptInfo {
filepath_hint: &path,
is_real_file: false,
inp: &mut file,
oup: oup,
oup,
line_prefix,
};
rga_preproc(ai2, None)?;

View File

@ -3,7 +3,7 @@ use crate::preproc::rga_preproc;
use ::zip::read::ZipFile;
use failure::*;
use lazy_static::lazy_static;
use std::fs::File;
// todo:
// maybe todo: read list of extensions from
//ffmpeg -demuxers | tail -n+5 | awk '{print $2}' | while read demuxer; do echo MUX=$demuxer; ffmpeg -h demuxer=$demuxer | grep 'Common extensions'; done 2>/dev/null
@ -19,7 +19,7 @@ lazy_static! {
.collect(),
};
}
#[derive(Default)]
pub struct ZipAdapter;
impl ZipAdapter {
@ -28,7 +28,7 @@ impl ZipAdapter {
}
}
impl GetMetadata for ZipAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
fn metadata(&self) -> &AdapterMeta {
&METADATA
}
}
@ -44,7 +44,6 @@ fn is_dir(f: &ZipFile) -> bool {
impl FileAdapter for ZipAdapter {
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
use std::io::prelude::*;
let AdaptInfo {
filepath_hint,
mut inp,
@ -66,12 +65,13 @@ impl FileAdapter for ZipAdapter {
file.size(),
file.compressed_size()
);
let line_prefix = &format!("{}{}: ", line_prefix, file.name().clone());
let line_prefix = &format!("{}{}: ", line_prefix, file.name());
rga_preproc(
AdaptInfo {
filepath_hint: &file.sanitized_name(),
is_real_file: false,
inp: &mut file,
oup: oup,
oup,
line_prefix,
},
None,

View File

@ -19,6 +19,7 @@ fn main() -> Result<(), Error> {
let ai = AdaptInfo {
inp: &mut File::open(&path)?,
filepath_hint: &path,
is_real_file: true,
oup: &mut std::io::stdout(),
line_prefix: "",
};

View File

@ -1,4 +1,4 @@
use clap::{crate_version, App, Arg, SubCommand};
use clap::{crate_version, App, Arg};
use log::*;
use rga::adapters::*;
use std::ffi::OsString;

View File

@ -1,5 +1,5 @@
use std::io::Write;
use failure::Fallible;
use std::io::Write;
/**
* wrap a writer so that it is passthrough,
@ -52,7 +52,7 @@ impl<W: Write> Write for CachingWriter<W> {
self.zstd_writer.take().unwrap().finish()?;
}
self.out.write_all(&buf[0..wrote])?;
return Ok(wrote);
Ok(wrote)
}
None => self.out.write(buf),
}

View File

@ -1,3 +1,5 @@
#![warn(clippy::all)]
pub mod adapters;
mod caching_writer;
pub mod errors;

View File

@ -2,15 +2,9 @@ use crate::adapters::*;
use crate::CachingWriter;
use failure::{format_err, Error};
use path_clean::PathClean;
use std::fs::File;
use std::io::Read;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use std::rc::Rc;
// longest compressed conversion output to save in cache
const MAX_DB_BLOB_LEN: usize = 2000000;
const MAX_DB_BLOB_LEN: usize = 2_000_000;
const ZSTD_LEVEL: i32 = 12;
pub fn open_cache_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Error> {
@ -43,6 +37,7 @@ pub fn rga_preproc<'a>(
let adapters = adapter_matcher()?;
let AdaptInfo {
filepath_hint,
is_real_file,
inp,
oup,
line_prefix,
@ -50,7 +45,7 @@ pub fn rga_preproc<'a>(
} = ai;
let filename = filepath_hint
.file_name()
.ok_or(format_err!("Empty filename"))?;
.ok_or_else(|| format_err!("Empty filename"))?;
eprintln!("abs path: {:?}", filepath_hint);
@ -106,6 +101,7 @@ pub fn rga_preproc<'a>(
ad.adapt(AdaptInfo {
line_prefix,
filepath_hint,
is_real_file,
inp,
oup: &mut compbuf,
})?;
@ -135,6 +131,7 @@ pub fn rga_preproc<'a>(
ad.adapt(AdaptInfo {
line_prefix,
filepath_hint,
is_real_file,
inp,
oup,
})?;
@ -143,14 +140,14 @@ pub fn rga_preproc<'a>(
}
}
None => {
let allow_cat = false;
// allow passthrough if the file is in an archive, otherwise it should have been filtered out by rg
let allow_cat = !is_real_file;
if allow_cat {
eprintln!("no adapter for that file, running cat!");
let stdini = std::io::stdin();
let mut stdin = stdini.lock();
let stdouti = std::io::stdout();
let mut stdout = stdouti.lock();
std::io::copy(&mut stdin, &mut stdout)?;
spawning::postproc_line_prefix(line_prefix, &mut stdin, &mut stdout)?;
Ok(())
} else {
Err(format_err!("No adapter found for file {:?}", filename))