mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-24 04:14:57 +00:00
use failure crate, etc
This commit is contained in:
parent
e98c60001d
commit
b48f456963
59
Cargo.lock
generated
59
Cargo.lock
generated
@ -26,6 +26,27 @@ name = "autocfg"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace-sys"
|
||||
version = "0.1.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bincode"
|
||||
version = "1.1.4"
|
||||
@ -113,6 +134,7 @@ name = "failure"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"backtrace 0.3.30 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
@ -157,6 +179,11 @@ dependencies = [
|
||||
"unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.3.0"
|
||||
@ -471,10 +498,13 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"cachedir 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tree_magic_fork 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
@ -496,6 +526,11 @@ dependencies = [
|
||||
"uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.2.3"
|
||||
@ -504,6 +539,11 @@ dependencies = [
|
||||
"semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "0.3.3"
|
||||
@ -531,6 +571,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
name = "serde"
|
||||
version = "1.0.92"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"serde_derive 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
@ -542,6 +585,16 @@ dependencies = [
|
||||
"syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.39"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "0.6.9"
|
||||
@ -685,6 +738,8 @@ dependencies = [
|
||||
"checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee"
|
||||
"checksum arrayvec 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "92c7fb76bc8826a8b33b4ee5bb07a247a81e76764ab4d55e8f73e3a4d8808c71"
|
||||
"checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf"
|
||||
"checksum backtrace 0.3.30 (registry+https://github.com/rust-lang/crates.io-index)" = "ada4c783bb7e7443c14e0480f429ae2cc99da95065aeab7ee1b81ada0419404f"
|
||||
"checksum backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)" = "797c830ac25ccc92a7f8a7b9862bde440715531514594a6154e3d4a54dd769b6"
|
||||
"checksum bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "9f04a5e50dc80b3d5d35320889053637d15011aed5e66b66b37ae798c65da6f7"
|
||||
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
|
||||
"checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb"
|
||||
@ -703,6 +758,7 @@ dependencies = [
|
||||
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
|
||||
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
|
||||
"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
|
||||
"checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f"
|
||||
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
|
||||
"checksum libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)" = "a844cabbd5a77e60403a58af576f0a1baa83c3dd2670be63e615bd24fc58b82d"
|
||||
"checksum lmdb-rkv 0.11.4 (registry+https://github.com/rust-lang/crates.io-index)" = "e25b4069789bf7ac069d6fd58229f18aec20c6f7cc9173cb731d11c10dbb6b6e"
|
||||
@ -742,13 +798,16 @@ dependencies = [
|
||||
"checksum regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8f0a0bcab2fd7d1d7c54fa9eae6f43eddeb9ce2e7352f8518a814a4f65d60c58"
|
||||
"checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96"
|
||||
"checksum rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)" = "2c1b8d667bf149bfac7c47bb728dfb7246f35fdf61c2f16f9f588194f087d23c"
|
||||
"checksum rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f4dccf6f4891ebcc0c39f9b6eb1a83b9bf5d747cb439ec6fba4f3b977038af"
|
||||
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
|
||||
"checksum ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "b96a9549dc8d48f2c283938303c4b5a77aa29bfbc5b54b084fb1630408899a8f"
|
||||
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
|
||||
"checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d"
|
||||
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
|
||||
"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
||||
"checksum serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)" = "32746bf0f26eab52f06af0d0aa1984f641341d06d8d673c693871da2d188c9be"
|
||||
"checksum serde_derive 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)" = "46a3223d0c9ba936b61c0d2e3e559e3217dbfb8d65d06d26e8b3c25de38bae3e"
|
||||
"checksum serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)" = "5a23aa71d4a4d43fdbfaac00eff68ba8a06a51759a89ac3304323e800c4dd40d"
|
||||
"checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be"
|
||||
"checksum syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)" = "a1393e4a97a19c01e900df2aec855a29f71cf02c402e2f443b8d2747c25c5dbe"
|
||||
"checksum synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f"
|
||||
|
@ -22,5 +22,8 @@ rkv = "0.9.5"
|
||||
cachedir = "0.1.1"
|
||||
path-clean = "0.1.0"
|
||||
bincode = "1.1.4"
|
||||
serde = "1.0.92"
|
||||
serde = { version="1.0.92", features = ["derive"] }
|
||||
zstd = "0.4.24"
|
||||
lazy_static = "1.3.0"
|
||||
serde_json = "1.0.39"
|
||||
failure = "0.1.5"
|
||||
|
@ -2,19 +2,18 @@ pub mod ffmpeg;
|
||||
pub mod pandoc;
|
||||
pub mod poppler;
|
||||
pub mod spawning;
|
||||
use regex::{Regex, RegexSet};
|
||||
use std::io::BufRead;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::rc::Rc;
|
||||
use failure::*;
|
||||
|
||||
//pub use ffmpeg::FffmpegAdapter;
|
||||
|
||||
use regex::{Regex, RegexSet};
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
use std::io::BufRead;
|
||||
use std::io::Write;
|
||||
use std::rc::Rc;
|
||||
|
||||
pub enum Matcher {
|
||||
// MimeType(Regex),
|
||||
FileName(Regex),
|
||||
FileExtension(String),
|
||||
}
|
||||
|
||||
pub struct AdapterMeta {
|
||||
@ -34,22 +33,24 @@ pub trait GetMetadata {
|
||||
fn metadata<'a>(&'a self) -> &'a AdapterMeta;
|
||||
}
|
||||
pub trait FileAdapter: GetMetadata {
|
||||
fn adapt(&self, inp_fname: &str, oup: &mut dyn Write) -> std::io::Result<()>;
|
||||
fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()>;
|
||||
}
|
||||
|
||||
pub fn ExtensionMatcher(extension: &str) -> Matcher {
|
||||
let regex = Regex::new(&format!(".*\\.{}", ®ex::escape(extension)))
|
||||
.expect("we know this regex compiles");
|
||||
Matcher::FileName(regex)
|
||||
pub fn extension_to_regex(extension: &str) -> Regex {
|
||||
Regex::new(&format!(".*\\.{}", ®ex::escape(extension))).expect("we know this regex compiles")
|
||||
}
|
||||
|
||||
pub fn init_adapters() -> Result<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>, regex::Error> {
|
||||
pub fn get_adapters() -> Vec<Rc<dyn FileAdapter>> {
|
||||
let adapters: Vec<Rc<dyn FileAdapter>> = vec![
|
||||
Rc::new(crate::adapters::ffmpeg::FFmpegAdapter::new()),
|
||||
Rc::new(crate::adapters::pandoc::PandocAdapter::new()),
|
||||
Rc::new(crate::adapters::poppler::PopplerAdapter::new()),
|
||||
];
|
||||
adapters
|
||||
}
|
||||
|
||||
pub fn adapter_matcher() -> Result<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>, regex::Error> {
|
||||
let adapters = get_adapters();
|
||||
let mut fname_regexes = vec![];
|
||||
//let mut mime_regexes = vec![];
|
||||
for adapter in adapters.into_iter() {
|
||||
@ -57,14 +58,16 @@ pub fn init_adapters() -> Result<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>
|
||||
for matcher in &metadata.matchers {
|
||||
match matcher {
|
||||
//Matcher::MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
|
||||
Matcher::FileName(re) => fname_regexes.push((re.clone(), adapter.clone())),
|
||||
Matcher::FileExtension(re) => {
|
||||
fname_regexes.push((extension_to_regex(re), adapter.clone()))
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
|
||||
//let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
|
||||
return Ok(move |meta: FileMeta| {
|
||||
// todo: handle multiple matches
|
||||
// todo: handle multiple conflicting matches
|
||||
for m in fname_regex_set.matches(&meta.lossy_filename) {
|
||||
return Some(fname_regexes[m].1.clone());
|
||||
}
|
||||
|
@ -1,43 +1,128 @@
|
||||
use super::*;
|
||||
use spawning::SpawningFileAdapter;
|
||||
use std::io::Write;
|
||||
use std::process::Command;
|
||||
use super::spawning::map_exe_error;
|
||||
use lazy_static::lazy_static;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::io::BufReader;
|
||||
use std::process::*;
|
||||
use failure::*;
|
||||
// todo:
|
||||
// maybe todo: read list of extensions from
|
||||
//ffmpeg -demuxers | tail -n+5 | awk '{print $2}' | while read demuxer; do echo MUX=$demuxer; ffmpeg -h demuxer=$demuxer | grep 'Common extensions'; done 2>/dev/null
|
||||
static EXTENSIONS: &[&str] = &["mkv", "mp4", "avi"];
|
||||
|
||||
pub struct FFmpegAdapter {
|
||||
_metadata: AdapterMeta,
|
||||
lazy_static! {
|
||||
static ref METADATA: AdapterMeta = AdapterMeta {
|
||||
name: "ffmpeg".to_owned(),
|
||||
version: 1,
|
||||
matchers: EXTENSIONS
|
||||
.iter()
|
||||
.map(|s| Matcher::FileExtension(s.to_string()))
|
||||
.collect(),
|
||||
};
|
||||
}
|
||||
// maybe todo: read from
|
||||
// ffmpeg -demuxers
|
||||
// ffmpeg -h demuxer=xyz
|
||||
static extensions: &[&str] = &["mkv", "mp4", "avi"];
|
||||
|
||||
pub struct FFmpegAdapter;
|
||||
|
||||
impl FFmpegAdapter {
|
||||
pub fn new() -> FFmpegAdapter {
|
||||
FFmpegAdapter {
|
||||
_metadata: AdapterMeta {
|
||||
name: "ffmpeg".to_owned(),
|
||||
version: 1,
|
||||
matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(),
|
||||
},
|
||||
}
|
||||
FFmpegAdapter
|
||||
}
|
||||
}
|
||||
impl GetMetadata for FFmpegAdapter {
|
||||
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
|
||||
&self._metadata
|
||||
&METADATA
|
||||
}
|
||||
}
|
||||
impl SpawningFileAdapter for FFmpegAdapter {
|
||||
fn command(&self, inp_fname: &str) -> Command {
|
||||
let mut cmd = Command::new("ffmpeg");
|
||||
cmd.arg("-hide_banner")
|
||||
.arg("-loglevel")
|
||||
.arg("panic")
|
||||
.arg("-i")
|
||||
.arg(inp_fname)
|
||||
.arg("-f")
|
||||
.arg("webvtt")
|
||||
.arg("-");
|
||||
cmd
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct FFprobeOutput {
|
||||
streams: Vec<FFprobeStream>,
|
||||
}
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct FFprobeStream {
|
||||
codec_type: String, // video,audio,subtitle
|
||||
}
|
||||
impl FileAdapter for FFmpegAdapter {
|
||||
fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()> {
|
||||
let spawn_fail = |e| map_exe_error(e, "ffprobe", "Make sure you have ffmpeg installed.");
|
||||
let has_subtitles = {
|
||||
let probe = Command::new("ffprobe")
|
||||
.args(vec![
|
||||
"-v",
|
||||
"error",
|
||||
"-select_streams",
|
||||
"s",
|
||||
"-of",
|
||||
"json",
|
||||
"-show_entries",
|
||||
"stream=codec_type",
|
||||
])
|
||||
.arg("-i")
|
||||
.arg(inp_fname)
|
||||
.output().map_err(spawn_fail)?;
|
||||
if !probe.status.success() {
|
||||
return Err(format_err!("ffprobe failed: {:?}", probe.status));
|
||||
}
|
||||
println!("{}", String::from_utf8_lossy(&probe.stdout));
|
||||
let p: FFprobeOutput = serde_json::from_slice(&probe.stdout)?;
|
||||
(p.streams.iter().count() > 0)
|
||||
};
|
||||
{
|
||||
let mut probe = Command::new("ffprobe")
|
||||
.args(vec![
|
||||
"-v",
|
||||
"error",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
"-of",
|
||||
"flat",
|
||||
// "-show_data",
|
||||
"-show_error",
|
||||
"-show_programs",
|
||||
"-show_chapters",
|
||||
// "-count_frames",
|
||||
//"-count_packets",
|
||||
])
|
||||
.arg("-i")
|
||||
.arg(inp_fname)
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()?;
|
||||
for line in BufReader::new(probe.stdout.as_mut().unwrap()).lines() {
|
||||
writeln!(oup, "metadata: {}", line?)?;
|
||||
}
|
||||
let exit = probe.wait()?;
|
||||
if !exit.success() {
|
||||
return Err(format_err!("ffprobe failed: {:?}", exit));
|
||||
}
|
||||
}
|
||||
if has_subtitles {
|
||||
let mut cmd = Command::new("ffmpeg");
|
||||
cmd.arg("-hide_banner")
|
||||
.arg("-loglevel")
|
||||
.arg("panic")
|
||||
.arg("-i")
|
||||
.arg(inp_fname)
|
||||
.arg("-f")
|
||||
.arg("webvtt")
|
||||
.arg("-");
|
||||
let mut cmd = cmd.stdout(Stdio::piped()).spawn().map_err(spawn_fail)?;
|
||||
let stdo = cmd.stdout.as_mut().expect("is piped");
|
||||
let time_re = Regex::new(r".*\d.*-->.*\d.*").unwrap();
|
||||
let mut time: String = "".to_owned();
|
||||
for line in BufReader::new(stdo).lines() {
|
||||
let line = line?;
|
||||
// 09:55.195 --> 09:56.730
|
||||
if time_re.is_match(&line) {
|
||||
time = line.to_owned();
|
||||
} else {
|
||||
if line.len() == 0 {
|
||||
oup.write(b"\n")?;
|
||||
} else {
|
||||
writeln!(oup, "{}: {}", time, line)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
use super::*;
|
||||
use lazy_static::lazy_static;
|
||||
use spawning::SpawningFileAdapter;
|
||||
|
||||
use std::io::Write;
|
||||
use std::process::Command;
|
||||
|
||||
// from https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/App/FormatHeuristics.hs
|
||||
@ -40,32 +39,35 @@ use std::process::Command;
|
||||
//"xhtml" -> Just "html"
|
||||
//"wiki" -> Just "mediawiki"
|
||||
|
||||
static extensions: &[&str] = &["epub", "odt", "docx", "pptx", "fb2", "icml", "rtf", "ipynb"];
|
||||
static EXTENSIONS: &[&str] = &["epub", "odt", "docx", "pptx", "fb2", "ipynb"];
|
||||
|
||||
pub struct PandocAdapter {
|
||||
_metadata: AdapterMeta,
|
||||
lazy_static! {
|
||||
static ref METADATA: AdapterMeta = AdapterMeta {
|
||||
name: "pandoc".to_owned(),
|
||||
version: 1,
|
||||
matchers: EXTENSIONS
|
||||
.iter()
|
||||
.map(|s| Matcher::FileExtension(s.to_string()))
|
||||
.collect(),
|
||||
};
|
||||
}
|
||||
pub struct PandocAdapter;
|
||||
|
||||
impl PandocAdapter {
|
||||
pub fn new() -> PandocAdapter {
|
||||
PandocAdapter {
|
||||
_metadata: AdapterMeta {
|
||||
name: "pandoc".to_owned(),
|
||||
version: 1,
|
||||
// todo: read from ffmpeg -demuxers?
|
||||
matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(),
|
||||
},
|
||||
}
|
||||
PandocAdapter
|
||||
}
|
||||
}
|
||||
impl GetMetadata for PandocAdapter {
|
||||
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
|
||||
&self._metadata
|
||||
&METADATA
|
||||
}
|
||||
}
|
||||
impl SpawningFileAdapter for PandocAdapter {
|
||||
fn command(&self, inp_fname: &str) -> Command {
|
||||
let mut cmd = Command::new("pandoc");
|
||||
fn get_exe(&self) -> &str {
|
||||
"pandoc"
|
||||
}
|
||||
fn command(&self, inp_fname: &Path, mut cmd: Command) -> Command {
|
||||
cmd
|
||||
// simpler markown (with more information loss but plainer text)
|
||||
.arg("--to=commonmark-header_attributes-link_attributes-fenced_divs-markdown_in_html_blocks-raw_html-native_divs-native_spans-bracketed_spans")
|
||||
|
@ -1,36 +1,35 @@
|
||||
use super::*;
|
||||
use lazy_static::lazy_static;
|
||||
use spawning::SpawningFileAdapter;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
use std::process::Command;
|
||||
use std::process::Stdio;
|
||||
static extensions: &[&str] = &["pdf"];
|
||||
|
||||
pub struct PopplerAdapter {
|
||||
_metadata: AdapterMeta,
|
||||
static EXTENSIONS: &[&str] = &["pdf"];
|
||||
|
||||
lazy_static! {
|
||||
static ref METADATA: AdapterMeta = AdapterMeta {
|
||||
name: "poppler".to_owned(),
|
||||
version: 1,
|
||||
matchers: EXTENSIONS.iter().map(|s| Matcher::FileExtension(s.to_string())).collect(),
|
||||
};
|
||||
}
|
||||
pub struct PopplerAdapter;
|
||||
|
||||
impl PopplerAdapter {
|
||||
pub fn new() -> PopplerAdapter {
|
||||
PopplerAdapter {
|
||||
_metadata: AdapterMeta {
|
||||
name: "poppler".to_owned(),
|
||||
version: 1,
|
||||
// todo: read from ffmpeg -demuxers?
|
||||
matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(),
|
||||
},
|
||||
}
|
||||
PopplerAdapter
|
||||
}
|
||||
}
|
||||
|
||||
impl GetMetadata for PopplerAdapter {
|
||||
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
|
||||
&self._metadata
|
||||
&METADATA
|
||||
}
|
||||
}
|
||||
impl SpawningFileAdapter for PopplerAdapter {
|
||||
fn command(&self, inp_fname: &str) -> Command {
|
||||
let mut cmd = Command::new("pdftotext");
|
||||
fn get_exe(&self) -> &str {
|
||||
"pdftotext"
|
||||
}
|
||||
fn command(&self, inp_fname: &Path, mut cmd: Command) -> Command {
|
||||
cmd.arg("-layout").arg("--").arg(inp_fname).arg("-");
|
||||
cmd
|
||||
}
|
||||
|
@ -2,27 +2,39 @@ use super::*;
|
||||
use std::io::Write;
|
||||
use std::process::Command;
|
||||
use std::process::Stdio;
|
||||
use failure::*;
|
||||
|
||||
pub trait SpawningFileAdapter: GetMetadata {
|
||||
fn command(&self, inp_fname: &str) -> Command;
|
||||
fn get_exe(&self) -> &str;
|
||||
fn command(&self, inp_fname: &Path, command: Command) -> Command;
|
||||
}
|
||||
|
||||
pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
|
||||
use std::io::ErrorKind::*;
|
||||
match err.kind() {
|
||||
NotFound => format_err!("Could not find executable \"{}\". {}", exe_name, help),
|
||||
_ => Error::from(err)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pipe_output(mut cmd: Command, oup: &mut dyn Write, exe_name: &str, help: &str) -> Fallible<()> {
|
||||
let mut cmd = cmd.stdout(Stdio::piped()).spawn().map_err(|e| map_exe_error(e, exe_name, help))?;
|
||||
let stdo = cmd.stdout.as_mut().expect("is piped");
|
||||
std::io::copy(stdo, oup)?;
|
||||
let status = cmd.wait()?;
|
||||
if status.success() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(format_err!("subprocess failed: {:?}", status))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> FileAdapter for T
|
||||
where
|
||||
T: SpawningFileAdapter,
|
||||
where
|
||||
T: SpawningFileAdapter,
|
||||
{
|
||||
fn adapt(&self, inp_fname: &str, oup: &mut dyn Write) -> std::io::Result<()> {
|
||||
let mut cmd = self.command(inp_fname).stdout(Stdio::piped()).spawn()?;
|
||||
let stdo = cmd.stdout.as_mut().expect("is piped");
|
||||
std::io::copy(stdo, oup)?;
|
||||
let status = cmd.wait()?;
|
||||
if status.success() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
"subprocess failed",
|
||||
))
|
||||
}
|
||||
fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()> {
|
||||
let cmd = Command::new(self.get_exe());
|
||||
pipe_output(self.command(inp_fname, cmd), oup, self.get_exe(), "")
|
||||
}
|
||||
}
|
||||
|
@ -1,21 +1,13 @@
|
||||
use path_clean::PathClean;
|
||||
use rga::adapters::*;
|
||||
use rga::CachingWriter;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use tree_magic;
|
||||
use failure::{Error, format_err};
|
||||
|
||||
const max_db_blob_len: usize = 2000000;
|
||||
// longest compressed conversion output to save in cache
|
||||
const MAX_DB_BLOB_LEN: usize = 2000000;
|
||||
const ZSTD_LEVEL: i32 = 12;
|
||||
|
||||
// lazy error
|
||||
fn lerr(inp: impl AsRef<str>) -> Box<dyn Error> {
|
||||
return inp.as_ref().into();
|
||||
}
|
||||
|
||||
fn open_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Box<dyn Error>> {
|
||||
fn open_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Error> {
|
||||
let app_cache = cachedir::CacheDirConfig::new("rga").get_cache_dir()?;
|
||||
|
||||
let db_arc = rkv::Manager::singleton()
|
||||
@ -33,18 +25,20 @@ fn open_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Box<dyn Erro
|
||||
Ok(db_arc)
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
fn main() -> Result<(), Error> {
|
||||
//db.
|
||||
let adapters = init_adapters()?;
|
||||
let filepath = std::env::args()
|
||||
let adapters = adapter_matcher()?;
|
||||
let filepath = std::env::args_os()
|
||||
.skip(1)
|
||||
.next()
|
||||
.ok_or(lerr("No filename specified"))?;
|
||||
eprintln!("fname: {}", filepath);
|
||||
let path = PathBuf::from(&filepath);
|
||||
.ok_or(format_err!("No filename specified"))?;
|
||||
eprintln!("inp fname: {:?}", filepath);
|
||||
let path = std::env::current_dir()?.join(&filepath);
|
||||
eprintln!("abs path: {:?}", path);
|
||||
eprintln!("clean path: {:?}", path.clean());
|
||||
let serialized_path: Vec<u8> =
|
||||
bincode::serialize(&path.clean()).expect("could not serialize path");
|
||||
let filename = path.file_name().ok_or(lerr("Empty filename"))?;
|
||||
bincode::serialize(&path.clean()).expect("could not serialize path"); // key in the cache database
|
||||
let filename = path.file_name().ok_or(format_err!("Empty filename"))?;
|
||||
|
||||
/*let mimetype = tree_magic::from_filepath(path).ok_or(lerr(format!(
|
||||
"File {} does not exist",
|
||||
@ -64,32 +58,33 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
let db_env = db_arc.read().unwrap();
|
||||
let db = db_env
|
||||
.open_single(db_name.as_str(), rkv::store::Options::create())
|
||||
.map_err(|p| lerr(format!("could not open db store: {:?}", p)))?;
|
||||
.map_err(|p| format_err!("could not open db store: {:?}", p))?;
|
||||
let reader = db_env.read().expect("could not get reader");
|
||||
match db
|
||||
.get(&reader, &serialized_path)
|
||||
.map_err(|p| lerr(format!("could not read from db: {:?}", p)))?
|
||||
.map_err(|p| format_err!("could not read from db: {:?}", p))?
|
||||
{
|
||||
Some(rkv::Value::Blob(cached)) => {
|
||||
let stdouti = std::io::stdout();
|
||||
zstd::stream::copy_decode(cached, stdouti.lock())?;
|
||||
Ok(())
|
||||
}
|
||||
Some(_) => Err(lerr("Integrity: value not blob")),
|
||||
Some(_) => Err(format_err!("Integrity: value not blob")),
|
||||
None => {
|
||||
let stdouti = std::io::stdout();
|
||||
let mut compbuf = CachingWriter::new(stdouti.lock(), max_db_blob_len, 12)?;
|
||||
ad.adapt(&filepath, &mut compbuf)?;
|
||||
let mut compbuf =
|
||||
CachingWriter::new(stdouti.lock(), MAX_DB_BLOB_LEN, ZSTD_LEVEL)?;
|
||||
ad.adapt(&path, &mut compbuf)?;
|
||||
let compressed = compbuf.finish()?;
|
||||
if let Some(cached) = compressed {
|
||||
eprintln!("compressed len: {}", cached.len());
|
||||
|
||||
{
|
||||
let mut writer = db_env.write().map_err(|p| {
|
||||
lerr(format!("could not open write handle to cache: {:?}", p))
|
||||
format_err!("could not open write handle to cache: {:?}", p)
|
||||
})?;
|
||||
db.put(&mut writer, &serialized_path, &rkv::Value::Blob(&cached))
|
||||
.map_err(|p| lerr(format!("could not write to cache: {:?}", p)))?;
|
||||
.map_err(|p| format_err!("could not write to cache: {:?}", p))?;
|
||||
writer.commit().unwrap();
|
||||
}
|
||||
}
|
||||
@ -98,13 +93,18 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
}
|
||||
}
|
||||
None => {
|
||||
eprintln!("no adapter for that file, running cat!");
|
||||
let stdini = std::io::stdin();
|
||||
let mut stdin = stdini.lock();
|
||||
let stdouti = std::io::stdout();
|
||||
let mut stdout = stdouti.lock();
|
||||
std::io::copy(&mut stdin, &mut stdout)?;
|
||||
Ok(())
|
||||
let allow_cat = false;
|
||||
if allow_cat {
|
||||
eprintln!("no adapter for that file, running cat!");
|
||||
let stdini = std::io::stdin();
|
||||
let mut stdin = stdini.lock();
|
||||
let stdouti = std::io::stdout();
|
||||
let mut stdout = stdouti.lock();
|
||||
std::io::copy(&mut stdin, &mut stdout)?;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(format_err!("No adapter found for file {:?}", filename))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,13 +1,26 @@
|
||||
use rga::adapters;
|
||||
|
||||
use rga::adapters::*;
|
||||
use std::process::Command;
|
||||
|
||||
fn main() -> std::io::Result<()> {
|
||||
let adapters = get_adapters();
|
||||
|
||||
let extensions = adapters
|
||||
.iter()
|
||||
.flat_map(|a| &a.metadata().matchers)
|
||||
.filter_map(|m| match m {
|
||||
Matcher::FileExtension(ext) => Some(ext as &str),
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(",");
|
||||
|
||||
let exe = std::env::current_exe().expect("Could not get executable location");
|
||||
let preproc_exe = exe.with_file_name("rga-preproc");
|
||||
let mut child = Command::new("rg")
|
||||
.arg("--no-line-number")
|
||||
.arg("--pre")
|
||||
.arg(preproc_exe)
|
||||
.arg("--pre-glob")
|
||||
.arg(format!("*.{{{}}}", extensions))
|
||||
.args(std::env::args().skip(1))
|
||||
.spawn()?;
|
||||
|
||||
|
@ -1,8 +1,5 @@
|
||||
use std::io::Write;
|
||||
|
||||
enum Sta<'t> {
|
||||
ToZstd(Vec<u8>, zstd::stream::write::Encoder<&'t mut Vec<u8>>),
|
||||
}
|
||||
use failure::Fallible;
|
||||
|
||||
/**
|
||||
* wrap a writer so that it is passthrough,
|
||||
@ -18,7 +15,7 @@ impl<W: Write> CachingWriter<W> {
|
||||
out: W,
|
||||
max_cache_size: usize,
|
||||
compression_level: i32,
|
||||
) -> std::io::Result<CachingWriter<W>> {
|
||||
) -> Fallible<CachingWriter<W>> {
|
||||
Ok(CachingWriter {
|
||||
out,
|
||||
max_cache_size,
|
||||
@ -48,7 +45,7 @@ impl<W: Write> Write for CachingWriter<W> {
|
||||
Some(writer) => {
|
||||
let wrote = writer.write(buf)?;
|
||||
let compressed_len = writer.get_ref().len();
|
||||
eprintln!("wrote {} to zstd, len now {}", wrote, compressed_len);
|
||||
//eprintln!("wrote {} to zstd, len now {}", wrote, compressed_len);
|
||||
if compressed_len > self.max_cache_size {
|
||||
eprintln!("cache longer than max, dropping");
|
||||
//writer.finish();
|
||||
|
0
src/errors.rs
Normal file
0
src/errors.rs
Normal file
@ -1,3 +1,4 @@
|
||||
pub mod adapters;
|
||||
mod caching_writer;
|
||||
pub mod errors;
|
||||
pub use caching_writer::CachingWriter;
|
||||
|
Loading…
Reference in New Issue
Block a user