diff --git a/Cargo.lock b/Cargo.lock index 189a1b8..84a35f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,27 @@ name = "autocfg" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "backtrace" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "backtrace-sys" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "bincode" version = "1.1.4" @@ -113,6 +134,7 @@ name = "failure" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ + "backtrace 0.3.30 (registry+https://github.com/rust-lang/crates.io-index)", "failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -157,6 +179,11 @@ dependencies = [ "unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "itoa" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "lazy_static" version = "1.3.0" @@ -471,10 +498,13 @@ version = "0.1.0" dependencies = [ "bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "cachedir 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", "tree_magic_fork 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -496,6 +526,11 @@ dependencies = [ "uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rustc-demangle" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "rustc_version" version = "0.2.3" @@ -504,6 +539,11 @@ dependencies = [ "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "ryu" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "scopeguard" version = "0.3.3" @@ -531,6 +571,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" name = "serde" version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde_derive 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "serde_derive" @@ -542,6 +585,16 @@ dependencies = [ "syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "serde_json" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "smallvec" version = "0.6.9" @@ -685,6 +738,8 @@ dependencies = [ "checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" "checksum arrayvec 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "92c7fb76bc8826a8b33b4ee5bb07a247a81e76764ab4d55e8f73e3a4d8808c71" "checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf" +"checksum backtrace 0.3.30 (registry+https://github.com/rust-lang/crates.io-index)" = "ada4c783bb7e7443c14e0480f429ae2cc99da95065aeab7ee1b81ada0419404f" +"checksum backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)" = "797c830ac25ccc92a7f8a7b9862bde440715531514594a6154e3d4a54dd769b6" "checksum bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "9f04a5e50dc80b3d5d35320889053637d15011aed5e66b66b37ae798c65da6f7" "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" "checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb" @@ -703,6 +758,7 @@ dependencies = [ "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" "checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" "checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" +"checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f" "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" "checksum libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)" = "a844cabbd5a77e60403a58af576f0a1baa83c3dd2670be63e615bd24fc58b82d" "checksum lmdb-rkv 0.11.4 (registry+https://github.com/rust-lang/crates.io-index)" = "e25b4069789bf7ac069d6fd58229f18aec20c6f7cc9173cb731d11c10dbb6b6e" @@ -742,13 +798,16 @@ dependencies = [ "checksum regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8f0a0bcab2fd7d1d7c54fa9eae6f43eddeb9ce2e7352f8518a814a4f65d60c58" "checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96" "checksum rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)" = "2c1b8d667bf149bfac7c47bb728dfb7246f35fdf61c2f16f9f588194f087d23c" +"checksum rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f4dccf6f4891ebcc0c39f9b6eb1a83b9bf5d747cb439ec6fba4f3b977038af" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +"checksum ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "b96a9549dc8d48f2c283938303c4b5a77aa29bfbc5b54b084fb1630408899a8f" "checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" "checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d" "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" "checksum serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)" = "32746bf0f26eab52f06af0d0aa1984f641341d06d8d673c693871da2d188c9be" "checksum serde_derive 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)" = "46a3223d0c9ba936b61c0d2e3e559e3217dbfb8d65d06d26e8b3c25de38bae3e" +"checksum serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)" = "5a23aa71d4a4d43fdbfaac00eff68ba8a06a51759a89ac3304323e800c4dd40d" "checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be" "checksum syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)" = "a1393e4a97a19c01e900df2aec855a29f71cf02c402e2f443b8d2747c25c5dbe" "checksum synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f" diff --git a/Cargo.toml b/Cargo.toml index 75ecbb4..be9a417 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,5 +22,8 @@ rkv = "0.9.5" cachedir = "0.1.1" path-clean = "0.1.0" bincode = "1.1.4" -serde = "1.0.92" +serde = { version="1.0.92", features = ["derive"] } zstd = "0.4.24" +lazy_static = "1.3.0" +serde_json = "1.0.39" +failure = "0.1.5" diff --git a/src/adapters.rs b/src/adapters.rs index f4ef9bd..229d6ba 100644 --- a/src/adapters.rs +++ b/src/adapters.rs @@ -2,19 +2,18 @@ pub mod ffmpeg; pub mod pandoc; pub mod poppler; pub mod spawning; +use regex::{Regex, RegexSet}; +use std::io::BufRead; +use std::io::Write; +use std::path::Path; +use std::rc::Rc; +use failure::*; //pub use ffmpeg::FffmpegAdapter; -use regex::{Regex, RegexSet}; -use std::collections::HashMap; -use std::ffi::OsString; -use std::io::BufRead; -use std::io::Write; -use std::rc::Rc; - pub enum Matcher { // MimeType(Regex), - FileName(Regex), + FileExtension(String), } pub struct AdapterMeta { @@ -34,22 +33,24 @@ pub trait GetMetadata { fn metadata<'a>(&'a self) -> &'a AdapterMeta; } pub trait FileAdapter: GetMetadata { - fn adapt(&self, inp_fname: &str, oup: &mut dyn Write) -> std::io::Result<()>; + fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()>; } -pub fn ExtensionMatcher(extension: &str) -> Matcher { - let regex = Regex::new(&format!(".*\\.{}", ®ex::escape(extension))) - .expect("we know this regex compiles"); - Matcher::FileName(regex) +pub fn extension_to_regex(extension: &str) -> Regex { + Regex::new(&format!(".*\\.{}", ®ex::escape(extension))).expect("we know this regex compiles") } -pub fn init_adapters() -> Result Option>, regex::Error> { +pub fn get_adapters() -> Vec> { let adapters: Vec> = vec![ Rc::new(crate::adapters::ffmpeg::FFmpegAdapter::new()), Rc::new(crate::adapters::pandoc::PandocAdapter::new()), Rc::new(crate::adapters::poppler::PopplerAdapter::new()), ]; + adapters +} +pub fn adapter_matcher() -> Result Option>, regex::Error> { + let adapters = get_adapters(); let mut fname_regexes = vec![]; //let mut mime_regexes = vec![]; for adapter in adapters.into_iter() { @@ -57,14 +58,16 @@ pub fn init_adapters() -> Result Option for matcher in &metadata.matchers { match matcher { //Matcher::MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())), - Matcher::FileName(re) => fname_regexes.push((re.clone(), adapter.clone())), + Matcher::FileExtension(re) => { + fname_regexes.push((extension_to_regex(re), adapter.clone())) + } }; } } let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?; //let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?; return Ok(move |meta: FileMeta| { - // todo: handle multiple matches + // todo: handle multiple conflicting matches for m in fname_regex_set.matches(&meta.lossy_filename) { return Some(fname_regexes[m].1.clone()); } diff --git a/src/adapters/ffmpeg.rs b/src/adapters/ffmpeg.rs index 9dc985b..6f8c306 100644 --- a/src/adapters/ffmpeg.rs +++ b/src/adapters/ffmpeg.rs @@ -1,43 +1,128 @@ use super::*; -use spawning::SpawningFileAdapter; -use std::io::Write; -use std::process::Command; +use super::spawning::map_exe_error; +use lazy_static::lazy_static; +use serde::{Deserialize, Serialize}; +use std::io::BufReader; +use std::process::*; +use failure::*; +// todo: +// maybe todo: read list of extensions from +//ffmpeg -demuxers | tail -n+5 | awk '{print $2}' | while read demuxer; do echo MUX=$demuxer; ffmpeg -h demuxer=$demuxer | grep 'Common extensions'; done 2>/dev/null +static EXTENSIONS: &[&str] = &["mkv", "mp4", "avi"]; -pub struct FFmpegAdapter { - _metadata: AdapterMeta, +lazy_static! { + static ref METADATA: AdapterMeta = AdapterMeta { + name: "ffmpeg".to_owned(), + version: 1, + matchers: EXTENSIONS + .iter() + .map(|s| Matcher::FileExtension(s.to_string())) + .collect(), + }; } -// maybe todo: read from -// ffmpeg -demuxers -// ffmpeg -h demuxer=xyz -static extensions: &[&str] = &["mkv", "mp4", "avi"]; + +pub struct FFmpegAdapter; impl FFmpegAdapter { pub fn new() -> FFmpegAdapter { - FFmpegAdapter { - _metadata: AdapterMeta { - name: "ffmpeg".to_owned(), - version: 1, - matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(), - }, - } + FFmpegAdapter } } impl GetMetadata for FFmpegAdapter { fn metadata<'a>(&'a self) -> &'a AdapterMeta { - &self._metadata + &METADATA } } -impl SpawningFileAdapter for FFmpegAdapter { - fn command(&self, inp_fname: &str) -> Command { - let mut cmd = Command::new("ffmpeg"); - cmd.arg("-hide_banner") - .arg("-loglevel") - .arg("panic") - .arg("-i") - .arg(inp_fname) - .arg("-f") - .arg("webvtt") - .arg("-"); - cmd + +#[derive(Serialize, Deserialize)] +struct FFprobeOutput { + streams: Vec, +} +#[derive(Serialize, Deserialize)] +struct FFprobeStream { + codec_type: String, // video,audio,subtitle +} +impl FileAdapter for FFmpegAdapter { + fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()> { + let spawn_fail = |e| map_exe_error(e, "ffprobe", "Make sure you have ffmpeg installed."); + let has_subtitles = { + let probe = Command::new("ffprobe") + .args(vec![ + "-v", + "error", + "-select_streams", + "s", + "-of", + "json", + "-show_entries", + "stream=codec_type", + ]) + .arg("-i") + .arg(inp_fname) + .output().map_err(spawn_fail)?; + if !probe.status.success() { + return Err(format_err!("ffprobe failed: {:?}", probe.status)); + } + println!("{}", String::from_utf8_lossy(&probe.stdout)); + let p: FFprobeOutput = serde_json::from_slice(&probe.stdout)?; + (p.streams.iter().count() > 0) + }; + { + let mut probe = Command::new("ffprobe") + .args(vec![ + "-v", + "error", + "-show_format", + "-show_streams", + "-of", + "flat", + // "-show_data", + "-show_error", + "-show_programs", + "-show_chapters", + // "-count_frames", + //"-count_packets", + ]) + .arg("-i") + .arg(inp_fname) + .stdout(Stdio::piped()) + .spawn()?; + for line in BufReader::new(probe.stdout.as_mut().unwrap()).lines() { + writeln!(oup, "metadata: {}", line?)?; + } + let exit = probe.wait()?; + if !exit.success() { + return Err(format_err!("ffprobe failed: {:?}", exit)); + } + } + if has_subtitles { + let mut cmd = Command::new("ffmpeg"); + cmd.arg("-hide_banner") + .arg("-loglevel") + .arg("panic") + .arg("-i") + .arg(inp_fname) + .arg("-f") + .arg("webvtt") + .arg("-"); + let mut cmd = cmd.stdout(Stdio::piped()).spawn().map_err(spawn_fail)?; + let stdo = cmd.stdout.as_mut().expect("is piped"); + let time_re = Regex::new(r".*\d.*-->.*\d.*").unwrap(); + let mut time: String = "".to_owned(); + for line in BufReader::new(stdo).lines() { + let line = line?; + // 09:55.195 --> 09:56.730 + if time_re.is_match(&line) { + time = line.to_owned(); + } else { + if line.len() == 0 { + oup.write(b"\n")?; + } else { + writeln!(oup, "{}: {}", time, line)?; + } + } + } + } + Ok(()) } } diff --git a/src/adapters/pandoc.rs b/src/adapters/pandoc.rs index 6a62c45..1d373ff 100644 --- a/src/adapters/pandoc.rs +++ b/src/adapters/pandoc.rs @@ -1,7 +1,6 @@ use super::*; +use lazy_static::lazy_static; use spawning::SpawningFileAdapter; - -use std::io::Write; use std::process::Command; // from https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/App/FormatHeuristics.hs @@ -40,32 +39,35 @@ use std::process::Command; //"xhtml" -> Just "html" //"wiki" -> Just "mediawiki" -static extensions: &[&str] = &["epub", "odt", "docx", "pptx", "fb2", "icml", "rtf", "ipynb"]; +static EXTENSIONS: &[&str] = &["epub", "odt", "docx", "pptx", "fb2", "ipynb"]; -pub struct PandocAdapter { - _metadata: AdapterMeta, +lazy_static! { + static ref METADATA: AdapterMeta = AdapterMeta { + name: "pandoc".to_owned(), + version: 1, + matchers: EXTENSIONS + .iter() + .map(|s| Matcher::FileExtension(s.to_string())) + .collect(), + }; } +pub struct PandocAdapter; impl PandocAdapter { pub fn new() -> PandocAdapter { - PandocAdapter { - _metadata: AdapterMeta { - name: "pandoc".to_owned(), - version: 1, - // todo: read from ffmpeg -demuxers? - matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(), - }, - } + PandocAdapter } } impl GetMetadata for PandocAdapter { fn metadata<'a>(&'a self) -> &'a AdapterMeta { - &self._metadata + &METADATA } } impl SpawningFileAdapter for PandocAdapter { - fn command(&self, inp_fname: &str) -> Command { - let mut cmd = Command::new("pandoc"); + fn get_exe(&self) -> &str { + "pandoc" + } + fn command(&self, inp_fname: &Path, mut cmd: Command) -> Command { cmd // simpler markown (with more information loss but plainer text) .arg("--to=commonmark-header_attributes-link_attributes-fenced_divs-markdown_in_html_blocks-raw_html-native_divs-native_spans-bracketed_spans") diff --git a/src/adapters/poppler.rs b/src/adapters/poppler.rs index b7058f9..301ffd1 100644 --- a/src/adapters/poppler.rs +++ b/src/adapters/poppler.rs @@ -1,36 +1,35 @@ use super::*; +use lazy_static::lazy_static; use spawning::SpawningFileAdapter; -use std::io::Read; -use std::io::Write; use std::process::Command; -use std::process::Stdio; -static extensions: &[&str] = &["pdf"]; -pub struct PopplerAdapter { - _metadata: AdapterMeta, +static EXTENSIONS: &[&str] = &["pdf"]; + +lazy_static! { + static ref METADATA: AdapterMeta = AdapterMeta { + name: "poppler".to_owned(), + version: 1, + matchers: EXTENSIONS.iter().map(|s| Matcher::FileExtension(s.to_string())).collect(), + }; } +pub struct PopplerAdapter; impl PopplerAdapter { pub fn new() -> PopplerAdapter { - PopplerAdapter { - _metadata: AdapterMeta { - name: "poppler".to_owned(), - version: 1, - // todo: read from ffmpeg -demuxers? - matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(), - }, - } + PopplerAdapter } } impl GetMetadata for PopplerAdapter { fn metadata<'a>(&'a self) -> &'a AdapterMeta { - &self._metadata + &METADATA } } impl SpawningFileAdapter for PopplerAdapter { - fn command(&self, inp_fname: &str) -> Command { - let mut cmd = Command::new("pdftotext"); + fn get_exe(&self) -> &str { + "pdftotext" + } + fn command(&self, inp_fname: &Path, mut cmd: Command) -> Command { cmd.arg("-layout").arg("--").arg(inp_fname).arg("-"); cmd } diff --git a/src/adapters/spawning.rs b/src/adapters/spawning.rs index bad4430..bf69747 100644 --- a/src/adapters/spawning.rs +++ b/src/adapters/spawning.rs @@ -2,27 +2,39 @@ use super::*; use std::io::Write; use std::process::Command; use std::process::Stdio; +use failure::*; pub trait SpawningFileAdapter: GetMetadata { - fn command(&self, inp_fname: &str) -> Command; + fn get_exe(&self) -> &str; + fn command(&self, inp_fname: &Path, command: Command) -> Command; +} + +pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error { + use std::io::ErrorKind::*; + match err.kind() { + NotFound => format_err!("Could not find executable \"{}\". {}", exe_name, help), + _ => Error::from(err) + } +} + +pub fn pipe_output(mut cmd: Command, oup: &mut dyn Write, exe_name: &str, help: &str) -> Fallible<()> { + let mut cmd = cmd.stdout(Stdio::piped()).spawn().map_err(|e| map_exe_error(e, exe_name, help))?; + let stdo = cmd.stdout.as_mut().expect("is piped"); + std::io::copy(stdo, oup)?; + let status = cmd.wait()?; + if status.success() { + Ok(()) + } else { + Err(format_err!("subprocess failed: {:?}", status)) + } } impl FileAdapter for T -where - T: SpawningFileAdapter, + where + T: SpawningFileAdapter, { - fn adapt(&self, inp_fname: &str, oup: &mut dyn Write) -> std::io::Result<()> { - let mut cmd = self.command(inp_fname).stdout(Stdio::piped()).spawn()?; - let stdo = cmd.stdout.as_mut().expect("is piped"); - std::io::copy(stdo, oup)?; - let status = cmd.wait()?; - if status.success() { - Ok(()) - } else { - Err(std::io::Error::new( - std::io::ErrorKind::Other, - "subprocess failed", - )) - } + fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()> { + let cmd = Command::new(self.get_exe()); + pipe_output(self.command(inp_fname, cmd), oup, self.get_exe(), "") } } diff --git a/src/bin/rga-preproc.rs b/src/bin/rga-preproc.rs index a14009e..d8d2604 100644 --- a/src/bin/rga-preproc.rs +++ b/src/bin/rga-preproc.rs @@ -1,21 +1,13 @@ use path_clean::PathClean; use rga::adapters::*; use rga::CachingWriter; -use serde::{Deserialize, Serialize}; -use std::error::Error; -use std::fmt; -use std::io::Write; -use std::path::{Path, PathBuf}; -use tree_magic; +use failure::{Error, format_err}; -const max_db_blob_len: usize = 2000000; +// longest compressed conversion output to save in cache +const MAX_DB_BLOB_LEN: usize = 2000000; +const ZSTD_LEVEL: i32 = 12; -// lazy error -fn lerr(inp: impl AsRef) -> Box { - return inp.as_ref().into(); -} - -fn open_db() -> Result>, Box> { +fn open_db() -> Result>, Error> { let app_cache = cachedir::CacheDirConfig::new("rga").get_cache_dir()?; let db_arc = rkv::Manager::singleton() @@ -33,18 +25,20 @@ fn open_db() -> Result>, Box Result<(), Box> { +fn main() -> Result<(), Error> { //db. - let adapters = init_adapters()?; - let filepath = std::env::args() + let adapters = adapter_matcher()?; + let filepath = std::env::args_os() .skip(1) .next() - .ok_or(lerr("No filename specified"))?; - eprintln!("fname: {}", filepath); - let path = PathBuf::from(&filepath); + .ok_or(format_err!("No filename specified"))?; + eprintln!("inp fname: {:?}", filepath); + let path = std::env::current_dir()?.join(&filepath); + eprintln!("abs path: {:?}", path); + eprintln!("clean path: {:?}", path.clean()); let serialized_path: Vec = - bincode::serialize(&path.clean()).expect("could not serialize path"); - let filename = path.file_name().ok_or(lerr("Empty filename"))?; + bincode::serialize(&path.clean()).expect("could not serialize path"); // key in the cache database + let filename = path.file_name().ok_or(format_err!("Empty filename"))?; /*let mimetype = tree_magic::from_filepath(path).ok_or(lerr(format!( "File {} does not exist", @@ -64,32 +58,33 @@ fn main() -> Result<(), Box> { let db_env = db_arc.read().unwrap(); let db = db_env .open_single(db_name.as_str(), rkv::store::Options::create()) - .map_err(|p| lerr(format!("could not open db store: {:?}", p)))?; + .map_err(|p| format_err!("could not open db store: {:?}", p))?; let reader = db_env.read().expect("could not get reader"); match db .get(&reader, &serialized_path) - .map_err(|p| lerr(format!("could not read from db: {:?}", p)))? + .map_err(|p| format_err!("could not read from db: {:?}", p))? { Some(rkv::Value::Blob(cached)) => { let stdouti = std::io::stdout(); zstd::stream::copy_decode(cached, stdouti.lock())?; Ok(()) } - Some(_) => Err(lerr("Integrity: value not blob")), + Some(_) => Err(format_err!("Integrity: value not blob")), None => { let stdouti = std::io::stdout(); - let mut compbuf = CachingWriter::new(stdouti.lock(), max_db_blob_len, 12)?; - ad.adapt(&filepath, &mut compbuf)?; + let mut compbuf = + CachingWriter::new(stdouti.lock(), MAX_DB_BLOB_LEN, ZSTD_LEVEL)?; + ad.adapt(&path, &mut compbuf)?; let compressed = compbuf.finish()?; if let Some(cached) = compressed { eprintln!("compressed len: {}", cached.len()); { let mut writer = db_env.write().map_err(|p| { - lerr(format!("could not open write handle to cache: {:?}", p)) + format_err!("could not open write handle to cache: {:?}", p) })?; db.put(&mut writer, &serialized_path, &rkv::Value::Blob(&cached)) - .map_err(|p| lerr(format!("could not write to cache: {:?}", p)))?; + .map_err(|p| format_err!("could not write to cache: {:?}", p))?; writer.commit().unwrap(); } } @@ -98,13 +93,18 @@ fn main() -> Result<(), Box> { } } None => { - eprintln!("no adapter for that file, running cat!"); - let stdini = std::io::stdin(); - let mut stdin = stdini.lock(); - let stdouti = std::io::stdout(); - let mut stdout = stdouti.lock(); - std::io::copy(&mut stdin, &mut stdout)?; - Ok(()) + let allow_cat = false; + if allow_cat { + eprintln!("no adapter for that file, running cat!"); + let stdini = std::io::stdin(); + let mut stdin = stdini.lock(); + let stdouti = std::io::stdout(); + let mut stdout = stdouti.lock(); + std::io::copy(&mut stdin, &mut stdout)?; + Ok(()) + } else { + Err(format_err!("No adapter found for file {:?}", filename)) + } } } } diff --git a/src/bin/rga.rs b/src/bin/rga.rs index 166c2c0..0d9c66c 100644 --- a/src/bin/rga.rs +++ b/src/bin/rga.rs @@ -1,13 +1,26 @@ -use rga::adapters; - +use rga::adapters::*; use std::process::Command; fn main() -> std::io::Result<()> { + let adapters = get_adapters(); + + let extensions = adapters + .iter() + .flat_map(|a| &a.metadata().matchers) + .filter_map(|m| match m { + Matcher::FileExtension(ext) => Some(ext as &str), + }) + .collect::>() + .join(","); + let exe = std::env::current_exe().expect("Could not get executable location"); let preproc_exe = exe.with_file_name("rga-preproc"); let mut child = Command::new("rg") + .arg("--no-line-number") .arg("--pre") .arg(preproc_exe) + .arg("--pre-glob") + .arg(format!("*.{{{}}}", extensions)) .args(std::env::args().skip(1)) .spawn()?; diff --git a/src/caching_writer.rs b/src/caching_writer.rs index bb1b74c..17c5173 100644 --- a/src/caching_writer.rs +++ b/src/caching_writer.rs @@ -1,8 +1,5 @@ use std::io::Write; - -enum Sta<'t> { - ToZstd(Vec, zstd::stream::write::Encoder<&'t mut Vec>), -} +use failure::Fallible; /** * wrap a writer so that it is passthrough, @@ -18,7 +15,7 @@ impl CachingWriter { out: W, max_cache_size: usize, compression_level: i32, - ) -> std::io::Result> { + ) -> Fallible> { Ok(CachingWriter { out, max_cache_size, @@ -48,7 +45,7 @@ impl Write for CachingWriter { Some(writer) => { let wrote = writer.write(buf)?; let compressed_len = writer.get_ref().len(); - eprintln!("wrote {} to zstd, len now {}", wrote, compressed_len); + //eprintln!("wrote {} to zstd, len now {}", wrote, compressed_len); if compressed_len > self.max_cache_size { eprintln!("cache longer than max, dropping"); //writer.finish(); diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/lib.rs b/src/lib.rs index b489eb7..c1855a6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ pub mod adapters; mod caching_writer; +pub mod errors; pub use caching_writer::CachingWriter;