use failure crate, etc

This commit is contained in:
phiresky 2019-06-05 21:28:35 +02:00
parent e98c60001d
commit b48f456963
12 changed files with 312 additions and 138 deletions

59
Cargo.lock generated
View File

@ -26,6 +26,27 @@ name = "autocfg"
version = "0.1.4" version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "backtrace"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)",
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "backtrace-sys"
version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "bincode" name = "bincode"
version = "1.1.4" version = "1.1.4"
@ -113,6 +134,7 @@ name = "failure"
version = "0.1.5" version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [ dependencies = [
"backtrace 0.3.30 (registry+https://github.com/rust-lang/crates.io-index)",
"failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -157,6 +179,11 @@ dependencies = [
"unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "itoa"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.3.0" version = "1.3.0"
@ -471,10 +498,13 @@ version = "0.1.0"
dependencies = [ dependencies = [
"bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"cachedir 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "cachedir 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)",
"tree_magic_fork 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "tree_magic_fork 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -496,6 +526,11 @@ dependencies = [
"uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)", "uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "rustc-demangle"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "rustc_version" name = "rustc_version"
version = "0.2.3" version = "0.2.3"
@ -504,6 +539,11 @@ dependencies = [
"semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "ryu"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "scopeguard" name = "scopeguard"
version = "0.3.3" version = "0.3.3"
@ -531,6 +571,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
name = "serde" name = "serde"
version = "1.0.92" version = "1.0.92"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"serde_derive 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
@ -542,6 +585,16 @@ dependencies = [
"syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)", "syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "serde_json"
version = "1.0.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "smallvec" name = "smallvec"
version = "0.6.9" version = "0.6.9"
@ -685,6 +738,8 @@ dependencies = [
"checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" "checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee"
"checksum arrayvec 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "92c7fb76bc8826a8b33b4ee5bb07a247a81e76764ab4d55e8f73e3a4d8808c71" "checksum arrayvec 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "92c7fb76bc8826a8b33b4ee5bb07a247a81e76764ab4d55e8f73e3a4d8808c71"
"checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf" "checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf"
"checksum backtrace 0.3.30 (registry+https://github.com/rust-lang/crates.io-index)" = "ada4c783bb7e7443c14e0480f429ae2cc99da95065aeab7ee1b81ada0419404f"
"checksum backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)" = "797c830ac25ccc92a7f8a7b9862bde440715531514594a6154e3d4a54dd769b6"
"checksum bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "9f04a5e50dc80b3d5d35320889053637d15011aed5e66b66b37ae798c65da6f7" "checksum bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "9f04a5e50dc80b3d5d35320889053637d15011aed5e66b66b37ae798c65da6f7"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb" "checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb"
@ -703,6 +758,7 @@ dependencies = [
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" "checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" "checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
"checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f"
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)" = "a844cabbd5a77e60403a58af576f0a1baa83c3dd2670be63e615bd24fc58b82d" "checksum libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)" = "a844cabbd5a77e60403a58af576f0a1baa83c3dd2670be63e615bd24fc58b82d"
"checksum lmdb-rkv 0.11.4 (registry+https://github.com/rust-lang/crates.io-index)" = "e25b4069789bf7ac069d6fd58229f18aec20c6f7cc9173cb731d11c10dbb6b6e" "checksum lmdb-rkv 0.11.4 (registry+https://github.com/rust-lang/crates.io-index)" = "e25b4069789bf7ac069d6fd58229f18aec20c6f7cc9173cb731d11c10dbb6b6e"
@ -742,13 +798,16 @@ dependencies = [
"checksum regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8f0a0bcab2fd7d1d7c54fa9eae6f43eddeb9ce2e7352f8518a814a4f65d60c58" "checksum regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8f0a0bcab2fd7d1d7c54fa9eae6f43eddeb9ce2e7352f8518a814a4f65d60c58"
"checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96" "checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96"
"checksum rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)" = "2c1b8d667bf149bfac7c47bb728dfb7246f35fdf61c2f16f9f588194f087d23c" "checksum rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)" = "2c1b8d667bf149bfac7c47bb728dfb7246f35fdf61c2f16f9f588194f087d23c"
"checksum rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f4dccf6f4891ebcc0c39f9b6eb1a83b9bf5d747cb439ec6fba4f3b977038af"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "b96a9549dc8d48f2c283938303c4b5a77aa29bfbc5b54b084fb1630408899a8f"
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" "checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
"checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d" "checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d"
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
"checksum serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)" = "32746bf0f26eab52f06af0d0aa1984f641341d06d8d673c693871da2d188c9be" "checksum serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)" = "32746bf0f26eab52f06af0d0aa1984f641341d06d8d673c693871da2d188c9be"
"checksum serde_derive 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)" = "46a3223d0c9ba936b61c0d2e3e559e3217dbfb8d65d06d26e8b3c25de38bae3e" "checksum serde_derive 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)" = "46a3223d0c9ba936b61c0d2e3e559e3217dbfb8d65d06d26e8b3c25de38bae3e"
"checksum serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)" = "5a23aa71d4a4d43fdbfaac00eff68ba8a06a51759a89ac3304323e800c4dd40d"
"checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be" "checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be"
"checksum syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)" = "a1393e4a97a19c01e900df2aec855a29f71cf02c402e2f443b8d2747c25c5dbe" "checksum syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)" = "a1393e4a97a19c01e900df2aec855a29f71cf02c402e2f443b8d2747c25c5dbe"
"checksum synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f" "checksum synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f"

View File

@ -22,5 +22,8 @@ rkv = "0.9.5"
cachedir = "0.1.1" cachedir = "0.1.1"
path-clean = "0.1.0" path-clean = "0.1.0"
bincode = "1.1.4" bincode = "1.1.4"
serde = "1.0.92" serde = { version="1.0.92", features = ["derive"] }
zstd = "0.4.24" zstd = "0.4.24"
lazy_static = "1.3.0"
serde_json = "1.0.39"
failure = "0.1.5"

View File

@ -2,19 +2,18 @@ pub mod ffmpeg;
pub mod pandoc; pub mod pandoc;
pub mod poppler; pub mod poppler;
pub mod spawning; pub mod spawning;
use regex::{Regex, RegexSet};
use std::io::BufRead;
use std::io::Write;
use std::path::Path;
use std::rc::Rc;
use failure::*;
//pub use ffmpeg::FffmpegAdapter; //pub use ffmpeg::FffmpegAdapter;
use regex::{Regex, RegexSet};
use std::collections::HashMap;
use std::ffi::OsString;
use std::io::BufRead;
use std::io::Write;
use std::rc::Rc;
pub enum Matcher { pub enum Matcher {
// MimeType(Regex), // MimeType(Regex),
FileName(Regex), FileExtension(String),
} }
pub struct AdapterMeta { pub struct AdapterMeta {
@ -34,22 +33,24 @@ pub trait GetMetadata {
fn metadata<'a>(&'a self) -> &'a AdapterMeta; fn metadata<'a>(&'a self) -> &'a AdapterMeta;
} }
pub trait FileAdapter: GetMetadata { pub trait FileAdapter: GetMetadata {
fn adapt(&self, inp_fname: &str, oup: &mut dyn Write) -> std::io::Result<()>; fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()>;
} }
pub fn ExtensionMatcher(extension: &str) -> Matcher { pub fn extension_to_regex(extension: &str) -> Regex {
let regex = Regex::new(&format!(".*\\.{}", &regex::escape(extension))) Regex::new(&format!(".*\\.{}", &regex::escape(extension))).expect("we know this regex compiles")
.expect("we know this regex compiles");
Matcher::FileName(regex)
} }
pub fn init_adapters() -> Result<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>, regex::Error> { pub fn get_adapters() -> Vec<Rc<dyn FileAdapter>> {
let adapters: Vec<Rc<dyn FileAdapter>> = vec![ let adapters: Vec<Rc<dyn FileAdapter>> = vec![
Rc::new(crate::adapters::ffmpeg::FFmpegAdapter::new()), Rc::new(crate::adapters::ffmpeg::FFmpegAdapter::new()),
Rc::new(crate::adapters::pandoc::PandocAdapter::new()), Rc::new(crate::adapters::pandoc::PandocAdapter::new()),
Rc::new(crate::adapters::poppler::PopplerAdapter::new()), Rc::new(crate::adapters::poppler::PopplerAdapter::new()),
]; ];
adapters
}
pub fn adapter_matcher() -> Result<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>, regex::Error> {
let adapters = get_adapters();
let mut fname_regexes = vec![]; let mut fname_regexes = vec![];
//let mut mime_regexes = vec![]; //let mut mime_regexes = vec![];
for adapter in adapters.into_iter() { for adapter in adapters.into_iter() {
@ -57,14 +58,16 @@ pub fn init_adapters() -> Result<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>
for matcher in &metadata.matchers { for matcher in &metadata.matchers {
match matcher { match matcher {
//Matcher::MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())), //Matcher::MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
Matcher::FileName(re) => fname_regexes.push((re.clone(), adapter.clone())), Matcher::FileExtension(re) => {
fname_regexes.push((extension_to_regex(re), adapter.clone()))
}
}; };
} }
} }
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?; let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
//let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?; //let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
return Ok(move |meta: FileMeta| { return Ok(move |meta: FileMeta| {
// todo: handle multiple matches // todo: handle multiple conflicting matches
for m in fname_regex_set.matches(&meta.lossy_filename) { for m in fname_regex_set.matches(&meta.lossy_filename) {
return Some(fname_regexes[m].1.clone()); return Some(fname_regexes[m].1.clone());
} }

View File

@ -1,34 +1,101 @@
use super::*; use super::*;
use spawning::SpawningFileAdapter; use super::spawning::map_exe_error;
use std::io::Write; use lazy_static::lazy_static;
use std::process::Command; use serde::{Deserialize, Serialize};
use std::io::BufReader;
use std::process::*;
use failure::*;
// todo:
// maybe todo: read list of extensions from
//ffmpeg -demuxers | tail -n+5 | awk '{print $2}' | while read demuxer; do echo MUX=$demuxer; ffmpeg -h demuxer=$demuxer | grep 'Common extensions'; done 2>/dev/null
static EXTENSIONS: &[&str] = &["mkv", "mp4", "avi"];
pub struct FFmpegAdapter { lazy_static! {
_metadata: AdapterMeta, static ref METADATA: AdapterMeta = AdapterMeta {
name: "ffmpeg".to_owned(),
version: 1,
matchers: EXTENSIONS
.iter()
.map(|s| Matcher::FileExtension(s.to_string()))
.collect(),
};
} }
// maybe todo: read from
// ffmpeg -demuxers pub struct FFmpegAdapter;
// ffmpeg -h demuxer=xyz
static extensions: &[&str] = &["mkv", "mp4", "avi"];
impl FFmpegAdapter { impl FFmpegAdapter {
pub fn new() -> FFmpegAdapter { pub fn new() -> FFmpegAdapter {
FFmpegAdapter { FFmpegAdapter
_metadata: AdapterMeta {
name: "ffmpeg".to_owned(),
version: 1,
matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(),
},
}
} }
} }
impl GetMetadata for FFmpegAdapter { impl GetMetadata for FFmpegAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta { fn metadata<'a>(&'a self) -> &'a AdapterMeta {
&self._metadata &METADATA
} }
} }
impl SpawningFileAdapter for FFmpegAdapter {
fn command(&self, inp_fname: &str) -> Command { #[derive(Serialize, Deserialize)]
struct FFprobeOutput {
streams: Vec<FFprobeStream>,
}
#[derive(Serialize, Deserialize)]
struct FFprobeStream {
codec_type: String, // video,audio,subtitle
}
impl FileAdapter for FFmpegAdapter {
fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()> {
let spawn_fail = |e| map_exe_error(e, "ffprobe", "Make sure you have ffmpeg installed.");
let has_subtitles = {
let probe = Command::new("ffprobe")
.args(vec![
"-v",
"error",
"-select_streams",
"s",
"-of",
"json",
"-show_entries",
"stream=codec_type",
])
.arg("-i")
.arg(inp_fname)
.output().map_err(spawn_fail)?;
if !probe.status.success() {
return Err(format_err!("ffprobe failed: {:?}", probe.status));
}
println!("{}", String::from_utf8_lossy(&probe.stdout));
let p: FFprobeOutput = serde_json::from_slice(&probe.stdout)?;
(p.streams.iter().count() > 0)
};
{
let mut probe = Command::new("ffprobe")
.args(vec![
"-v",
"error",
"-show_format",
"-show_streams",
"-of",
"flat",
// "-show_data",
"-show_error",
"-show_programs",
"-show_chapters",
// "-count_frames",
//"-count_packets",
])
.arg("-i")
.arg(inp_fname)
.stdout(Stdio::piped())
.spawn()?;
for line in BufReader::new(probe.stdout.as_mut().unwrap()).lines() {
writeln!(oup, "metadata: {}", line?)?;
}
let exit = probe.wait()?;
if !exit.success() {
return Err(format_err!("ffprobe failed: {:?}", exit));
}
}
if has_subtitles {
let mut cmd = Command::new("ffmpeg"); let mut cmd = Command::new("ffmpeg");
cmd.arg("-hide_banner") cmd.arg("-hide_banner")
.arg("-loglevel") .arg("-loglevel")
@ -38,6 +105,24 @@ impl SpawningFileAdapter for FFmpegAdapter {
.arg("-f") .arg("-f")
.arg("webvtt") .arg("webvtt")
.arg("-"); .arg("-");
cmd let mut cmd = cmd.stdout(Stdio::piped()).spawn().map_err(spawn_fail)?;
let stdo = cmd.stdout.as_mut().expect("is piped");
let time_re = Regex::new(r".*\d.*-->.*\d.*").unwrap();
let mut time: String = "".to_owned();
for line in BufReader::new(stdo).lines() {
let line = line?;
// 09:55.195 --> 09:56.730
if time_re.is_match(&line) {
time = line.to_owned();
} else {
if line.len() == 0 {
oup.write(b"\n")?;
} else {
writeln!(oup, "{}: {}", time, line)?;
}
}
}
}
Ok(())
} }
} }

View File

@ -1,7 +1,6 @@
use super::*; use super::*;
use lazy_static::lazy_static;
use spawning::SpawningFileAdapter; use spawning::SpawningFileAdapter;
use std::io::Write;
use std::process::Command; use std::process::Command;
// from https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/App/FormatHeuristics.hs // from https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/App/FormatHeuristics.hs
@ -40,32 +39,35 @@ use std::process::Command;
//"xhtml" -> Just "html" //"xhtml" -> Just "html"
//"wiki" -> Just "mediawiki" //"wiki" -> Just "mediawiki"
static extensions: &[&str] = &["epub", "odt", "docx", "pptx", "fb2", "icml", "rtf", "ipynb"]; static EXTENSIONS: &[&str] = &["epub", "odt", "docx", "pptx", "fb2", "ipynb"];
pub struct PandocAdapter { lazy_static! {
_metadata: AdapterMeta, static ref METADATA: AdapterMeta = AdapterMeta {
name: "pandoc".to_owned(),
version: 1,
matchers: EXTENSIONS
.iter()
.map(|s| Matcher::FileExtension(s.to_string()))
.collect(),
};
} }
pub struct PandocAdapter;
impl PandocAdapter { impl PandocAdapter {
pub fn new() -> PandocAdapter { pub fn new() -> PandocAdapter {
PandocAdapter { PandocAdapter
_metadata: AdapterMeta {
name: "pandoc".to_owned(),
version: 1,
// todo: read from ffmpeg -demuxers?
matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(),
},
}
} }
} }
impl GetMetadata for PandocAdapter { impl GetMetadata for PandocAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta { fn metadata<'a>(&'a self) -> &'a AdapterMeta {
&self._metadata &METADATA
} }
} }
impl SpawningFileAdapter for PandocAdapter { impl SpawningFileAdapter for PandocAdapter {
fn command(&self, inp_fname: &str) -> Command { fn get_exe(&self) -> &str {
let mut cmd = Command::new("pandoc"); "pandoc"
}
fn command(&self, inp_fname: &Path, mut cmd: Command) -> Command {
cmd cmd
// simpler markown (with more information loss but plainer text) // simpler markown (with more information loss but plainer text)
.arg("--to=commonmark-header_attributes-link_attributes-fenced_divs-markdown_in_html_blocks-raw_html-native_divs-native_spans-bracketed_spans") .arg("--to=commonmark-header_attributes-link_attributes-fenced_divs-markdown_in_html_blocks-raw_html-native_divs-native_spans-bracketed_spans")

View File

@ -1,36 +1,35 @@
use super::*; use super::*;
use lazy_static::lazy_static;
use spawning::SpawningFileAdapter; use spawning::SpawningFileAdapter;
use std::io::Read;
use std::io::Write;
use std::process::Command; use std::process::Command;
use std::process::Stdio;
static extensions: &[&str] = &["pdf"];
pub struct PopplerAdapter { static EXTENSIONS: &[&str] = &["pdf"];
_metadata: AdapterMeta,
lazy_static! {
static ref METADATA: AdapterMeta = AdapterMeta {
name: "poppler".to_owned(),
version: 1,
matchers: EXTENSIONS.iter().map(|s| Matcher::FileExtension(s.to_string())).collect(),
};
} }
pub struct PopplerAdapter;
impl PopplerAdapter { impl PopplerAdapter {
pub fn new() -> PopplerAdapter { pub fn new() -> PopplerAdapter {
PopplerAdapter { PopplerAdapter
_metadata: AdapterMeta {
name: "poppler".to_owned(),
version: 1,
// todo: read from ffmpeg -demuxers?
matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(),
},
}
} }
} }
impl GetMetadata for PopplerAdapter { impl GetMetadata for PopplerAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta { fn metadata<'a>(&'a self) -> &'a AdapterMeta {
&self._metadata &METADATA
} }
} }
impl SpawningFileAdapter for PopplerAdapter { impl SpawningFileAdapter for PopplerAdapter {
fn command(&self, inp_fname: &str) -> Command { fn get_exe(&self) -> &str {
let mut cmd = Command::new("pdftotext"); "pdftotext"
}
fn command(&self, inp_fname: &Path, mut cmd: Command) -> Command {
cmd.arg("-layout").arg("--").arg(inp_fname).arg("-"); cmd.arg("-layout").arg("--").arg(inp_fname).arg("-");
cmd cmd
} }

View File

@ -2,27 +2,39 @@ use super::*;
use std::io::Write; use std::io::Write;
use std::process::Command; use std::process::Command;
use std::process::Stdio; use std::process::Stdio;
use failure::*;
pub trait SpawningFileAdapter: GetMetadata { pub trait SpawningFileAdapter: GetMetadata {
fn command(&self, inp_fname: &str) -> Command; fn get_exe(&self) -> &str;
fn command(&self, inp_fname: &Path, command: Command) -> Command;
} }
impl<T> FileAdapter for T pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
where use std::io::ErrorKind::*;
T: SpawningFileAdapter, match err.kind() {
{ NotFound => format_err!("Could not find executable \"{}\". {}", exe_name, help),
fn adapt(&self, inp_fname: &str, oup: &mut dyn Write) -> std::io::Result<()> { _ => Error::from(err)
let mut cmd = self.command(inp_fname).stdout(Stdio::piped()).spawn()?; }
}
pub fn pipe_output(mut cmd: Command, oup: &mut dyn Write, exe_name: &str, help: &str) -> Fallible<()> {
let mut cmd = cmd.stdout(Stdio::piped()).spawn().map_err(|e| map_exe_error(e, exe_name, help))?;
let stdo = cmd.stdout.as_mut().expect("is piped"); let stdo = cmd.stdout.as_mut().expect("is piped");
std::io::copy(stdo, oup)?; std::io::copy(stdo, oup)?;
let status = cmd.wait()?; let status = cmd.wait()?;
if status.success() { if status.success() {
Ok(()) Ok(())
} else { } else {
Err(std::io::Error::new( Err(format_err!("subprocess failed: {:?}", status))
std::io::ErrorKind::Other,
"subprocess failed",
))
} }
} }
impl<T> FileAdapter for T
where
T: SpawningFileAdapter,
{
fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()> {
let cmd = Command::new(self.get_exe());
pipe_output(self.command(inp_fname, cmd), oup, self.get_exe(), "")
}
} }

View File

@ -1,21 +1,13 @@
use path_clean::PathClean; use path_clean::PathClean;
use rga::adapters::*; use rga::adapters::*;
use rga::CachingWriter; use rga::CachingWriter;
use serde::{Deserialize, Serialize}; use failure::{Error, format_err};
use std::error::Error;
use std::fmt;
use std::io::Write;
use std::path::{Path, PathBuf};
use tree_magic;
const max_db_blob_len: usize = 2000000; // longest compressed conversion output to save in cache
const MAX_DB_BLOB_LEN: usize = 2000000;
const ZSTD_LEVEL: i32 = 12;
// lazy error fn open_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Error> {
fn lerr(inp: impl AsRef<str>) -> Box<dyn Error> {
return inp.as_ref().into();
}
fn open_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Box<dyn Error>> {
let app_cache = cachedir::CacheDirConfig::new("rga").get_cache_dir()?; let app_cache = cachedir::CacheDirConfig::new("rga").get_cache_dir()?;
let db_arc = rkv::Manager::singleton() let db_arc = rkv::Manager::singleton()
@ -33,18 +25,20 @@ fn open_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Box<dyn Erro
Ok(db_arc) Ok(db_arc)
} }
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Error> {
//db. //db.
let adapters = init_adapters()?; let adapters = adapter_matcher()?;
let filepath = std::env::args() let filepath = std::env::args_os()
.skip(1) .skip(1)
.next() .next()
.ok_or(lerr("No filename specified"))?; .ok_or(format_err!("No filename specified"))?;
eprintln!("fname: {}", filepath); eprintln!("inp fname: {:?}", filepath);
let path = PathBuf::from(&filepath); let path = std::env::current_dir()?.join(&filepath);
eprintln!("abs path: {:?}", path);
eprintln!("clean path: {:?}", path.clean());
let serialized_path: Vec<u8> = let serialized_path: Vec<u8> =
bincode::serialize(&path.clean()).expect("could not serialize path"); bincode::serialize(&path.clean()).expect("could not serialize path"); // key in the cache database
let filename = path.file_name().ok_or(lerr("Empty filename"))?; let filename = path.file_name().ok_or(format_err!("Empty filename"))?;
/*let mimetype = tree_magic::from_filepath(path).ok_or(lerr(format!( /*let mimetype = tree_magic::from_filepath(path).ok_or(lerr(format!(
"File {} does not exist", "File {} does not exist",
@ -64,32 +58,33 @@ fn main() -> Result<(), Box<dyn Error>> {
let db_env = db_arc.read().unwrap(); let db_env = db_arc.read().unwrap();
let db = db_env let db = db_env
.open_single(db_name.as_str(), rkv::store::Options::create()) .open_single(db_name.as_str(), rkv::store::Options::create())
.map_err(|p| lerr(format!("could not open db store: {:?}", p)))?; .map_err(|p| format_err!("could not open db store: {:?}", p))?;
let reader = db_env.read().expect("could not get reader"); let reader = db_env.read().expect("could not get reader");
match db match db
.get(&reader, &serialized_path) .get(&reader, &serialized_path)
.map_err(|p| lerr(format!("could not read from db: {:?}", p)))? .map_err(|p| format_err!("could not read from db: {:?}", p))?
{ {
Some(rkv::Value::Blob(cached)) => { Some(rkv::Value::Blob(cached)) => {
let stdouti = std::io::stdout(); let stdouti = std::io::stdout();
zstd::stream::copy_decode(cached, stdouti.lock())?; zstd::stream::copy_decode(cached, stdouti.lock())?;
Ok(()) Ok(())
} }
Some(_) => Err(lerr("Integrity: value not blob")), Some(_) => Err(format_err!("Integrity: value not blob")),
None => { None => {
let stdouti = std::io::stdout(); let stdouti = std::io::stdout();
let mut compbuf = CachingWriter::new(stdouti.lock(), max_db_blob_len, 12)?; let mut compbuf =
ad.adapt(&filepath, &mut compbuf)?; CachingWriter::new(stdouti.lock(), MAX_DB_BLOB_LEN, ZSTD_LEVEL)?;
ad.adapt(&path, &mut compbuf)?;
let compressed = compbuf.finish()?; let compressed = compbuf.finish()?;
if let Some(cached) = compressed { if let Some(cached) = compressed {
eprintln!("compressed len: {}", cached.len()); eprintln!("compressed len: {}", cached.len());
{ {
let mut writer = db_env.write().map_err(|p| { let mut writer = db_env.write().map_err(|p| {
lerr(format!("could not open write handle to cache: {:?}", p)) format_err!("could not open write handle to cache: {:?}", p)
})?; })?;
db.put(&mut writer, &serialized_path, &rkv::Value::Blob(&cached)) db.put(&mut writer, &serialized_path, &rkv::Value::Blob(&cached))
.map_err(|p| lerr(format!("could not write to cache: {:?}", p)))?; .map_err(|p| format_err!("could not write to cache: {:?}", p))?;
writer.commit().unwrap(); writer.commit().unwrap();
} }
} }
@ -98,6 +93,8 @@ fn main() -> Result<(), Box<dyn Error>> {
} }
} }
None => { None => {
let allow_cat = false;
if allow_cat {
eprintln!("no adapter for that file, running cat!"); eprintln!("no adapter for that file, running cat!");
let stdini = std::io::stdin(); let stdini = std::io::stdin();
let mut stdin = stdini.lock(); let mut stdin = stdini.lock();
@ -105,6 +102,9 @@ fn main() -> Result<(), Box<dyn Error>> {
let mut stdout = stdouti.lock(); let mut stdout = stdouti.lock();
std::io::copy(&mut stdin, &mut stdout)?; std::io::copy(&mut stdin, &mut stdout)?;
Ok(()) Ok(())
} else {
Err(format_err!("No adapter found for file {:?}", filename))
}
} }
} }
} }

View File

@ -1,13 +1,26 @@
use rga::adapters; use rga::adapters::*;
use std::process::Command; use std::process::Command;
fn main() -> std::io::Result<()> { fn main() -> std::io::Result<()> {
let adapters = get_adapters();
let extensions = adapters
.iter()
.flat_map(|a| &a.metadata().matchers)
.filter_map(|m| match m {
Matcher::FileExtension(ext) => Some(ext as &str),
})
.collect::<Vec<_>>()
.join(",");
let exe = std::env::current_exe().expect("Could not get executable location"); let exe = std::env::current_exe().expect("Could not get executable location");
let preproc_exe = exe.with_file_name("rga-preproc"); let preproc_exe = exe.with_file_name("rga-preproc");
let mut child = Command::new("rg") let mut child = Command::new("rg")
.arg("--no-line-number")
.arg("--pre") .arg("--pre")
.arg(preproc_exe) .arg(preproc_exe)
.arg("--pre-glob")
.arg(format!("*.{{{}}}", extensions))
.args(std::env::args().skip(1)) .args(std::env::args().skip(1))
.spawn()?; .spawn()?;

View File

@ -1,8 +1,5 @@
use std::io::Write; use std::io::Write;
use failure::Fallible;
enum Sta<'t> {
ToZstd(Vec<u8>, zstd::stream::write::Encoder<&'t mut Vec<u8>>),
}
/** /**
* wrap a writer so that it is passthrough, * wrap a writer so that it is passthrough,
@ -18,7 +15,7 @@ impl<W: Write> CachingWriter<W> {
out: W, out: W,
max_cache_size: usize, max_cache_size: usize,
compression_level: i32, compression_level: i32,
) -> std::io::Result<CachingWriter<W>> { ) -> Fallible<CachingWriter<W>> {
Ok(CachingWriter { Ok(CachingWriter {
out, out,
max_cache_size, max_cache_size,
@ -48,7 +45,7 @@ impl<W: Write> Write for CachingWriter<W> {
Some(writer) => { Some(writer) => {
let wrote = writer.write(buf)?; let wrote = writer.write(buf)?;
let compressed_len = writer.get_ref().len(); let compressed_len = writer.get_ref().len();
eprintln!("wrote {} to zstd, len now {}", wrote, compressed_len); //eprintln!("wrote {} to zstd, len now {}", wrote, compressed_len);
if compressed_len > self.max_cache_size { if compressed_len > self.max_cache_size {
eprintln!("cache longer than max, dropping"); eprintln!("cache longer than max, dropping");
//writer.finish(); //writer.finish();

0
src/errors.rs Normal file
View File

View File

@ -1,3 +1,4 @@
pub mod adapters; pub mod adapters;
mod caching_writer; mod caching_writer;
pub mod errors;
pub use caching_writer::CachingWriter; pub use caching_writer::CachingWriter;