This commit is contained in:
phiresky 2019-06-06 23:43:30 +02:00
parent 83b804bef2
commit e98acedc81
12 changed files with 78 additions and 59 deletions

View File

@ -35,13 +35,16 @@ pub struct FileMeta {
} }
pub trait GetMetadata { pub trait GetMetadata {
fn metadata<'a>(&'a self) -> &'a AdapterMeta; fn metadata(&self) -> &AdapterMeta;
} }
pub trait FileAdapter: GetMetadata { pub trait FileAdapter: GetMetadata {
fn adapt(&self, a: AdaptInfo) -> Fallible<()>; fn adapt(&self, a: AdaptInfo) -> Fallible<()>;
} }
pub struct AdaptInfo<'a> { pub struct AdaptInfo<'a> {
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
pub filepath_hint: &'a Path, pub filepath_hint: &'a Path,
/// true if filepath_hint is an actual file on the file system
pub is_real_file: bool,
pub inp: &'a mut dyn Read, pub inp: &'a mut dyn Read,
pub oup: &'a mut (dyn Write + Send), pub oup: &'a mut (dyn Write + Send),
pub line_prefix: &'a str, pub line_prefix: &'a str,
@ -54,11 +57,11 @@ pub fn extension_to_regex(extension: &str) -> Regex {
pub fn get_adapters() -> Vec<Rc<dyn FileAdapter>> { pub fn get_adapters() -> Vec<Rc<dyn FileAdapter>> {
let adapters: Vec<Rc<dyn FileAdapter>> = vec![ let adapters: Vec<Rc<dyn FileAdapter>> = vec![
Rc::new(ffmpeg::FFmpegAdapter::new()), Rc::new(ffmpeg::FFmpegAdapter),
Rc::new(pandoc::PandocAdapter::new()), Rc::new(pandoc::PandocAdapter),
Rc::new(poppler::PopplerAdapter::new()), Rc::new(poppler::PopplerAdapter),
Rc::new(zip::ZipAdapter::new()), Rc::new(zip::ZipAdapter),
Rc::new(tar::TarAdapter::new()), Rc::new(tar::TarAdapter),
]; ];
adapters adapters
} }
@ -80,14 +83,15 @@ pub fn adapter_matcher() -> Result<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapte
} }
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?; let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
//let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?; //let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
return Ok(move |meta: FileMeta| { Ok(move |meta: FileMeta| {
// todo: handle multiple conflicting matches // todo: handle multiple conflicting matches
for m in fname_regex_set.matches(&meta.lossy_filename) { let matches = fname_regex_set.matches(&meta.lossy_filename);
return Some(fname_regexes[m].1.clone()); match matches.iter().next() {
Some(m) => Some(fname_regexes[m].1.clone()),
None => None,
} }
/*for m in mime_regex_set.matches(&meta.mimetype) { /*for m in mime_regex_set.matches(&meta.mimetype) {
return Some(mime_regexes[m].1.clone()); return Some(mime_regexes[m].1.clone());
}*/ }*/
return None; })
});
} }

View File

@ -21,6 +21,7 @@ lazy_static! {
}; };
} }
#[derive(Default)]
pub struct FFmpegAdapter; pub struct FFmpegAdapter;
impl FFmpegAdapter { impl FFmpegAdapter {
@ -29,7 +30,7 @@ impl FFmpegAdapter {
} }
} }
impl GetMetadata for FFmpegAdapter { impl GetMetadata for FFmpegAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta { fn metadata(&self) -> &AdapterMeta {
&METADATA &METADATA
} }
} }
@ -45,12 +46,17 @@ struct FFprobeStream {
impl FileAdapter for FFmpegAdapter { impl FileAdapter for FFmpegAdapter {
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> { fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
let AdaptInfo { let AdaptInfo {
is_real_file,
filepath_hint, filepath_hint,
inp,
oup, oup,
.. ..
} = ai; } = ai;
/*let spawn_fail = |e| map_exe_error(e, "ffprobe", "Make sure you have ffmpeg installed."); if !is_real_file {
eprintln!("Skipping video in archive");
return Ok(());
}
let inp_fname = filepath_hint;
let spawn_fail = |e| map_exe_error(e, "ffprobe", "Make sure you have ffmpeg installed.");
let has_subtitles = { let has_subtitles = {
let probe = Command::new("ffprobe") let probe = Command::new("ffprobe")
.args(vec![ .args(vec![
@ -65,7 +71,8 @@ impl FileAdapter for FFmpegAdapter {
]) ])
.arg("-i") .arg("-i")
.arg(inp_fname) .arg(inp_fname)
.output().map_err(spawn_fail)?; .output()
.map_err(spawn_fail)?;
if !probe.status.success() { if !probe.status.success() {
return Err(format_err!("ffprobe failed: {:?}", probe.status)); return Err(format_err!("ffprobe failed: {:?}", probe.status));
} }
@ -120,15 +127,13 @@ impl FileAdapter for FFmpegAdapter {
// 09:55.195 --> 09:56.730 // 09:55.195 --> 09:56.730
if time_re.is_match(&line) { if time_re.is_match(&line) {
time = line.to_owned(); time = line.to_owned();
} else { } else if line.is_empty() {
if line.len() == 0 { oup.write_all(b"\n")?;
oup.write(b"\n")?;
} else { } else {
writeln!(oup, "{}: {}", time, line)?; writeln!(oup, "{}: {}", time, line)?;
} }
} }
} }
}*/
Ok(()) Ok(())
} }
} }

View File

@ -51,6 +51,7 @@ lazy_static! {
.collect(), .collect(),
}; };
} }
#[derive(Default)]
pub struct PandocAdapter; pub struct PandocAdapter;
impl PandocAdapter { impl PandocAdapter {
@ -59,7 +60,7 @@ impl PandocAdapter {
} }
} }
impl GetMetadata for PandocAdapter { impl GetMetadata for PandocAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta { fn metadata(&self) -> &AdapterMeta {
&METADATA &METADATA
} }
} }

View File

@ -16,6 +16,7 @@ lazy_static! {
.collect(), .collect(),
}; };
} }
#[derive(Default)]
pub struct PopplerAdapter; pub struct PopplerAdapter;
impl PopplerAdapter { impl PopplerAdapter {
@ -25,19 +26,19 @@ impl PopplerAdapter {
} }
impl GetMetadata for PopplerAdapter { impl GetMetadata for PopplerAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta { fn metadata(&self) -> &AdapterMeta {
&METADATA &METADATA
} }
} }
impl SpawningFileAdapter for PopplerAdapter { impl SpawningFileAdapter for PopplerAdapter {
fn postproc(line_prefix: &str, inp: &mut Read, oup: &mut Write) -> Fallible<()> { fn postproc(line_prefix: &str, inp: &mut dyn Read, oup: &mut dyn Write) -> Fallible<()> {
// prepend Page X to each line // prepend Page X to each line
let mut page = 1; let mut page = 1;
for line in BufReader::new(inp).lines() { for line in BufReader::new(inp).lines() {
let mut line = line?; let mut line = line?;
if line.contains("\x0c") { if line.contains('\x0c') {
// page break // page break
line = line.replace("\x0c", ""); line = line.replace('\x0c', "");
page += 1; page += 1;
} }
oup.write_all(format!("{}Page {}: {}\n", line_prefix, page, line).as_bytes())?; oup.write_all(format!("{}Page {}: {}\n", line_prefix, page, line).as_bytes())?;
@ -47,7 +48,7 @@ impl SpawningFileAdapter for PopplerAdapter {
fn get_exe(&self) -> &str { fn get_exe(&self) -> &str {
"pdftotext" "pdftotext"
} }
fn command(&self, filepath_hint: &Path, mut cmd: Command) -> Command { fn command(&self, _filepath_hint: &Path, mut cmd: Command) -> Command {
cmd.arg("-layout").arg("-").arg("-"); cmd.arg("-layout").arg("-").arg("-");
cmd cmd
} }

View File

@ -4,13 +4,13 @@ use std::io::prelude::*;
use std::io::BufReader; use std::io::BufReader;
use std::process::Command; use std::process::Command;
use std::process::Stdio; use std::process::Stdio;
use std::thread;
pub trait SpawningFileAdapter: GetMetadata {
fn get_exe(&self) -> &str;
fn command(&self, filepath_hint: &Path, command: Command) -> Command;
fn postproc(line_prefix: &str, inp: &mut Read, oup: &mut Write) -> Fallible<()> { pub fn postproc_line_prefix(
line_prefix: &str,
inp: &mut dyn Read,
oup: &mut dyn Write,
) -> Fallible<()> {
//std::io::copy(inp, oup)?; //std::io::copy(inp, oup)?;
for line in BufReader::new(inp).lines() { for line in BufReader::new(inp).lines() {
@ -18,6 +18,13 @@ pub trait SpawningFileAdapter: GetMetadata {
} }
Ok(()) Ok(())
} }
pub trait SpawningFileAdapter: GetMetadata {
fn get_exe(&self) -> &str;
fn command(&self, filepath_hint: &Path, command: Command) -> Command;
fn postproc(line_prefix: &str, inp: &mut dyn Read, oup: &mut dyn Write) -> Fallible<()> {
postproc_line_prefix(line_prefix, inp, oup)
}
} }
pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error { pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {

View File

@ -3,7 +3,7 @@ use crate::preproc::rga_preproc;
use ::tar::EntryType::Regular; use ::tar::EntryType::Regular;
use failure::*; use failure::*;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
use std::path::PathBuf; use std::path::PathBuf;
@ -19,7 +19,7 @@ lazy_static! {
.collect(), .collect(),
}; };
} }
#[derive(Default)]
pub struct TarAdapter; pub struct TarAdapter;
impl TarAdapter { impl TarAdapter {
@ -28,7 +28,7 @@ impl TarAdapter {
} }
} }
impl GetMetadata for TarAdapter { impl GetMetadata for TarAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta { fn metadata(&self) -> &AdapterMeta {
&METADATA &METADATA
} }
} }
@ -108,8 +108,9 @@ impl FileAdapter for TarAdapter {
let line_prefix = &format!("{}{}: ", line_prefix, path.display()); let line_prefix = &format!("{}{}: ", line_prefix, path.display());
let ai2: AdaptInfo = AdaptInfo { let ai2: AdaptInfo = AdaptInfo {
filepath_hint: &path, filepath_hint: &path,
is_real_file: false,
inp: &mut file, inp: &mut file,
oup: oup, oup,
line_prefix, line_prefix,
}; };
rga_preproc(ai2, None)?; rga_preproc(ai2, None)?;

View File

@ -3,7 +3,7 @@ use crate::preproc::rga_preproc;
use ::zip::read::ZipFile; use ::zip::read::ZipFile;
use failure::*; use failure::*;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use std::fs::File;
// todo: // todo:
// maybe todo: read list of extensions from // maybe todo: read list of extensions from
//ffmpeg -demuxers | tail -n+5 | awk '{print $2}' | while read demuxer; do echo MUX=$demuxer; ffmpeg -h demuxer=$demuxer | grep 'Common extensions'; done 2>/dev/null //ffmpeg -demuxers | tail -n+5 | awk '{print $2}' | while read demuxer; do echo MUX=$demuxer; ffmpeg -h demuxer=$demuxer | grep 'Common extensions'; done 2>/dev/null
@ -19,7 +19,7 @@ lazy_static! {
.collect(), .collect(),
}; };
} }
#[derive(Default)]
pub struct ZipAdapter; pub struct ZipAdapter;
impl ZipAdapter { impl ZipAdapter {
@ -28,7 +28,7 @@ impl ZipAdapter {
} }
} }
impl GetMetadata for ZipAdapter { impl GetMetadata for ZipAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta { fn metadata(&self) -> &AdapterMeta {
&METADATA &METADATA
} }
} }
@ -44,7 +44,6 @@ fn is_dir(f: &ZipFile) -> bool {
impl FileAdapter for ZipAdapter { impl FileAdapter for ZipAdapter {
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> { fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
use std::io::prelude::*;
let AdaptInfo { let AdaptInfo {
filepath_hint, filepath_hint,
mut inp, mut inp,
@ -66,12 +65,13 @@ impl FileAdapter for ZipAdapter {
file.size(), file.size(),
file.compressed_size() file.compressed_size()
); );
let line_prefix = &format!("{}{}: ", line_prefix, file.name().clone()); let line_prefix = &format!("{}{}: ", line_prefix, file.name());
rga_preproc( rga_preproc(
AdaptInfo { AdaptInfo {
filepath_hint: &file.sanitized_name(), filepath_hint: &file.sanitized_name(),
is_real_file: false,
inp: &mut file, inp: &mut file,
oup: oup, oup,
line_prefix, line_prefix,
}, },
None, None,

View File

@ -19,6 +19,7 @@ fn main() -> Result<(), Error> {
let ai = AdaptInfo { let ai = AdaptInfo {
inp: &mut File::open(&path)?, inp: &mut File::open(&path)?,
filepath_hint: &path, filepath_hint: &path,
is_real_file: true,
oup: &mut std::io::stdout(), oup: &mut std::io::stdout(),
line_prefix: "", line_prefix: "",
}; };

View File

@ -1,4 +1,4 @@
use clap::{crate_version, App, Arg, SubCommand}; use clap::{crate_version, App, Arg};
use log::*; use log::*;
use rga::adapters::*; use rga::adapters::*;
use std::ffi::OsString; use std::ffi::OsString;

View File

@ -1,5 +1,5 @@
use std::io::Write;
use failure::Fallible; use failure::Fallible;
use std::io::Write;
/** /**
* wrap a writer so that it is passthrough, * wrap a writer so that it is passthrough,
@ -52,7 +52,7 @@ impl<W: Write> Write for CachingWriter<W> {
self.zstd_writer.take().unwrap().finish()?; self.zstd_writer.take().unwrap().finish()?;
} }
self.out.write_all(&buf[0..wrote])?; self.out.write_all(&buf[0..wrote])?;
return Ok(wrote); Ok(wrote)
} }
None => self.out.write(buf), None => self.out.write(buf),
} }

View File

@ -1,3 +1,5 @@
#![warn(clippy::all)]
pub mod adapters; pub mod adapters;
mod caching_writer; mod caching_writer;
pub mod errors; pub mod errors;

View File

@ -2,15 +2,9 @@ use crate::adapters::*;
use crate::CachingWriter; use crate::CachingWriter;
use failure::{format_err, Error}; use failure::{format_err, Error};
use path_clean::PathClean; use path_clean::PathClean;
use std::fs::File;
use std::io::Read;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use std::rc::Rc;
// longest compressed conversion output to save in cache // longest compressed conversion output to save in cache
const MAX_DB_BLOB_LEN: usize = 2000000; const MAX_DB_BLOB_LEN: usize = 2_000_000;
const ZSTD_LEVEL: i32 = 12; const ZSTD_LEVEL: i32 = 12;
pub fn open_cache_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Error> { pub fn open_cache_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Error> {
@ -43,6 +37,7 @@ pub fn rga_preproc<'a>(
let adapters = adapter_matcher()?; let adapters = adapter_matcher()?;
let AdaptInfo { let AdaptInfo {
filepath_hint, filepath_hint,
is_real_file,
inp, inp,
oup, oup,
line_prefix, line_prefix,
@ -50,7 +45,7 @@ pub fn rga_preproc<'a>(
} = ai; } = ai;
let filename = filepath_hint let filename = filepath_hint
.file_name() .file_name()
.ok_or(format_err!("Empty filename"))?; .ok_or_else(|| format_err!("Empty filename"))?;
eprintln!("abs path: {:?}", filepath_hint); eprintln!("abs path: {:?}", filepath_hint);
@ -106,6 +101,7 @@ pub fn rga_preproc<'a>(
ad.adapt(AdaptInfo { ad.adapt(AdaptInfo {
line_prefix, line_prefix,
filepath_hint, filepath_hint,
is_real_file,
inp, inp,
oup: &mut compbuf, oup: &mut compbuf,
})?; })?;
@ -135,6 +131,7 @@ pub fn rga_preproc<'a>(
ad.adapt(AdaptInfo { ad.adapt(AdaptInfo {
line_prefix, line_prefix,
filepath_hint, filepath_hint,
is_real_file,
inp, inp,
oup, oup,
})?; })?;
@ -143,14 +140,14 @@ pub fn rga_preproc<'a>(
} }
} }
None => { None => {
let allow_cat = false; // allow passthrough if the file is in an archive, otherwise it should have been filtered out by rg
let allow_cat = !is_real_file;
if allow_cat { if allow_cat {
eprintln!("no adapter for that file, running cat!");
let stdini = std::io::stdin(); let stdini = std::io::stdin();
let mut stdin = stdini.lock(); let mut stdin = stdini.lock();
let stdouti = std::io::stdout(); let stdouti = std::io::stdout();
let mut stdout = stdouti.lock(); let mut stdout = stdouti.lock();
std::io::copy(&mut stdin, &mut stdout)?; spawning::postproc_line_prefix(line_prefix, &mut stdin, &mut stdout)?;
Ok(()) Ok(())
} else { } else {
Err(format_err!("No adapter found for file {:?}", filename)) Err(format_err!("No adapter found for file {:?}", filename))