add zip support!

This commit is contained in:
phiresky 2019-06-06 11:00:13 +02:00
parent e0002a789d
commit 1a0bbc798e
13 changed files with 522 additions and 133 deletions

145
Cargo.lock generated
View File

@ -1,5 +1,10 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "adler32"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "aho-corasick"
version = "0.7.3"
@ -67,6 +72,24 @@ name = "byteorder"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bzip2"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bzip2-sys 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "bzip2-sys"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "cachedir"
version = "0.1.1"
@ -93,6 +116,36 @@ dependencies = [
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crc32fast"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-channel 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-deque 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-epoch 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-queue 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam-channel"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
"smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam-deque"
version = "0.2.0"
@ -102,6 +155,15 @@ dependencies = [
"crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam-deque"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"crossbeam-epoch 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam-epoch"
version = "0.3.1"
@ -116,6 +178,27 @@ dependencies = [
"scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam-epoch"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"arrayvec 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam-queue"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam-utils"
version = "0.2.2"
@ -124,6 +207,15 @@ dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam-utils"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "either"
version = "1.5.2"
@ -194,6 +286,16 @@ name = "libc"
version = "0.2.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "libflate"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lmdb-rkv"
version = "0.11.4"
@ -326,6 +428,11 @@ name = "pkg-config"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "podio"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "proc-macro2"
version = "0.4.30"
@ -498,6 +605,7 @@ version = "0.1.0"
dependencies = [
"bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"cachedir 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
"failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -506,6 +614,7 @@ dependencies = [
"serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)",
"tree_magic_fork 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"zip 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -629,6 +738,16 @@ dependencies = [
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "time"
version = "0.1.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "tree_magic_fork"
version = "0.2.2"
@ -706,6 +825,18 @@ name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "zip"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bzip2 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libflate 0.1.23 (registry+https://github.com/rust-lang/crates.io-index)",
"podio 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "zstd"
version = "0.4.24+zstd.1.4.0"
@ -734,6 +865,7 @@ dependencies = [
]
[metadata]
"checksum adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7e522997b529f05601e05166c07ed17789691f562762c7f3b987263d2dedee5c"
"checksum aho-corasick 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e6f484ae0c99fec2e858eb6134949117399f222608d84cadb3f58c1f97c2364c"
"checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee"
"checksum arrayvec 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "92c7fb76bc8826a8b33b4ee5bb07a247a81e76764ab4d55e8f73e3a4d8808c71"
@ -743,13 +875,22 @@ dependencies = [
"checksum bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "9f04a5e50dc80b3d5d35320889053637d15011aed5e66b66b37ae798c65da6f7"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb"
"checksum bzip2 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "42b7c3cbf0fa9c1b82308d57191728ca0256cb821220f4e2fd410a72ade26e3b"
"checksum bzip2-sys 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6584aa36f5ad4c9247f5323b0a42f37802b37a836f0ad87084d7a33961abe25f"
"checksum cachedir 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c06509d1f4ffa658939bd23f076cd929ef218241363796551528e7eec69128c8"
"checksum cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)" = "39f75544d7bbaf57560d2168f28fd649ff9c76153874db88bdbdfd839b1a7e7d"
"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1"
"checksum crossbeam 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b14492071ca110999a20bf90e3833406d5d66bfd93b4e52ec9539025ff43fe0d"
"checksum crossbeam-channel 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "0f0ed1a4de2235cabda8558ff5840bffb97fcb64c97827f354a451307df5f72b"
"checksum crossbeam-deque 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f739f8c5363aca78cfb059edf753d8f0d36908c348f3d8d1503f03d8b75d9cf3"
"checksum crossbeam-deque 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b18cd2e169ad86297e6bc0ad9aa679aee9daa4f19e8163860faf7c164e4f5a71"
"checksum crossbeam-epoch 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "927121f5407de9956180ff5e936fe3cf4324279280001cd56b669d28ee7e9150"
"checksum crossbeam-epoch 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "04c9e3102cc2d69cd681412141b390abd55a362afc1540965dad0ad4d34280b4"
"checksum crossbeam-queue 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7c979cd6cfe72335896575c6b5688da489e420d36a27a0b9eb0c73db574b4a4b"
"checksum crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2760899e32a1d58d5abb31129f8fae5de75220bc2176e77ff7c627ae45c918d9"
"checksum crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "f8306fcef4a7b563b76b7dd949ca48f52bc1141aa067d2ea09565f3e2652aa5c"
"checksum either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5527cfe0d098f36e3f8839852688e63c8fff1c90b2b405aef730615f9a7bcf7b"
"checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2"
"checksum failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1063915fd7ef4309e222a5a07cf9c319fb9c7836b1f89b85458672dbb127e1"
@ -761,6 +902,7 @@ dependencies = [
"checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f"
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)" = "a844cabbd5a77e60403a58af576f0a1baa83c3dd2670be63e615bd24fc58b82d"
"checksum libflate 0.1.23 (registry+https://github.com/rust-lang/crates.io-index)" = "76912aa0196b6f0e06d9c43ee877be45369157c06172ade12fe20ac3ee5ffa15"
"checksum lmdb-rkv 0.11.4 (registry+https://github.com/rust-lang/crates.io-index)" = "e25b4069789bf7ac069d6fd58229f18aec20c6f7cc9173cb731d11c10dbb6b6e"
"checksum lmdb-rkv-sys 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1470e0168f1832e35afd6d0931ae60db625685332837b97aa156773ec9c5e393"
"checksum lock_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ed946d4529956a20f2d63ebe1b69996d5a2137c91913fe3ebbeff957f5bca7ff"
@ -779,6 +921,7 @@ dependencies = [
"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c"
"checksum podio 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "780fb4b6698bbf9cf2444ea5d22411cef2953f0824b98f33cf454ec5615645bd"
"checksum proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759"
"checksum quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "faf4799c5d274f3868a4aae320a0a182cbd2baee377b378f080e16a23e9d80db"
"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
@ -812,6 +955,7 @@ dependencies = [
"checksum syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)" = "a1393e4a97a19c01e900df2aec855a29f71cf02c402e2f443b8d2747c25c5dbe"
"checksum synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f"
"checksum tree_magic_fork 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aab921ca9b828f83389f3f3c5e77404612547081e5222eb3a23d06184f6813af"
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
@ -823,6 +967,7 @@ dependencies = [
"checksum winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
"checksum zip 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c18fc320faf909036e46ac785ea827f72e485304877faf1a3a39538d3714dbc3"
"checksum zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2c5a6414958b49ee80f2dd0042023ac8f37cfe1d31fbeec0b9749cf6f2c03683"
"checksum zstd-safe 1.4.9+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d98332212af687878b146a6549c188e9b72971972d23089c831472f938e6272"
"checksum zstd-sys 1.4.10+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "46f433134fbd0c37c9eb5929733df5f34bcdff464722eb93155fcee93eb57652"

View File

@ -27,3 +27,5 @@ zstd = "0.4.24"
lazy_static = "1.3.0"
serde_json = "1.0.39"
failure = "0.1.5"
zip = "0.5.2"
crossbeam = "0.7.1"

Binary file not shown.

Binary file not shown.

View File

@ -2,12 +2,12 @@ pub mod ffmpeg;
pub mod pandoc;
pub mod poppler;
pub mod spawning;
pub mod zip;
use failure::*;
use regex::{Regex, RegexSet};
use std::io::BufRead;
use std::io::Write;
use std::io::prelude::*;
use std::path::Path;
use std::rc::Rc;
use failure::*;
//pub use ffmpeg::FffmpegAdapter;
@ -24,7 +24,7 @@ pub struct AdapterMeta {
pub struct FileMeta {
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
// and since we probably only want to do matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
// and since we probably only want to do only matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
pub lossy_filename: String,
// pub mimetype: String,
}
@ -33,7 +33,14 @@ pub trait GetMetadata {
fn metadata<'a>(&'a self) -> &'a AdapterMeta;
}
pub trait FileAdapter: GetMetadata {
fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()>;
fn adapt(&self, a: AdaptInfo) -> Fallible<()>;
}
pub struct AdaptInfo<'a> {
pub filepath_hint: &'a Path,
pub inp: &'a mut dyn Read,
pub oup: &'a mut (dyn Write + Send),
pub line_prefix: &'a str,
// pub adapt_subobject: &'a dyn Fn(AdaptInfo) -> Fallible<()>,
}
pub fn extension_to_regex(extension: &str) -> Regex {
@ -42,9 +49,10 @@ pub fn extension_to_regex(extension: &str) -> Regex {
pub fn get_adapters() -> Vec<Rc<dyn FileAdapter>> {
let adapters: Vec<Rc<dyn FileAdapter>> = vec![
Rc::new(crate::adapters::ffmpeg::FFmpegAdapter::new()),
Rc::new(crate::adapters::pandoc::PandocAdapter::new()),
Rc::new(crate::adapters::poppler::PopplerAdapter::new()),
Rc::new(ffmpeg::FFmpegAdapter::new()),
Rc::new(pandoc::PandocAdapter::new()),
Rc::new(poppler::PopplerAdapter::new()),
Rc::new(zip::ZipAdapter::new()),
];
adapters
}

View File

@ -1,10 +1,10 @@
use super::*;
use super::spawning::map_exe_error;
use super::*;
use failure::*;
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
use std::io::BufReader;
use std::process::*;
use failure::*;
// todo:
// maybe todo: read list of extensions from
//ffmpeg -demuxers | tail -n+5 | awk '{print $2}' | while read demuxer; do echo MUX=$demuxer; ffmpeg -h demuxer=$demuxer | grep 'Common extensions'; done 2>/dev/null
@ -43,8 +43,14 @@ struct FFprobeStream {
codec_type: String, // video,audio,subtitle
}
impl FileAdapter for FFmpegAdapter {
fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()> {
let spawn_fail = |e| map_exe_error(e, "ffprobe", "Make sure you have ffmpeg installed.");
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
let AdaptInfo {
filepath_hint,
inp,
oup,
..
} = ai;
/*let spawn_fail = |e| map_exe_error(e, "ffprobe", "Make sure you have ffmpeg installed.");
let has_subtitles = {
let probe = Command::new("ffprobe")
.args(vec![
@ -122,7 +128,7 @@ impl FileAdapter for FFmpegAdapter {
}
}
}
}
}*/
Ok(())
}
}

View File

@ -67,14 +67,13 @@ impl SpawningFileAdapter for PandocAdapter {
fn get_exe(&self) -> &str {
"pandoc"
}
fn command(&self, inp_fname: &Path, mut cmd: Command) -> Command {
fn command(&self, filepath_hint: &Path, mut cmd: Command) -> Command {
cmd
.arg("--from").arg(filepath_hint.extension().unwrap())
// simpler markown (with more information loss but plainer text)
.arg("--to=commonmark-header_attributes-link_attributes-fenced_divs-markdown_in_html_blocks-raw_html-native_divs-native_spans-bracketed_spans")
.arg("--wrap=none")
.arg("--atx-headers")
.arg("--")
.arg(inp_fname);
.arg("--atx-headers");
cmd
}
}

View File

@ -9,7 +9,10 @@ lazy_static! {
static ref METADATA: AdapterMeta = AdapterMeta {
name: "poppler".to_owned(),
version: 1,
matchers: EXTENSIONS.iter().map(|s| Matcher::FileExtension(s.to_string())).collect(),
matchers: EXTENSIONS
.iter()
.map(|s| Matcher::FileExtension(s.to_string()))
.collect(),
};
}
pub struct PopplerAdapter;
@ -29,8 +32,8 @@ impl SpawningFileAdapter for PopplerAdapter {
fn get_exe(&self) -> &str {
"pdftotext"
}
fn command(&self, inp_fname: &Path, mut cmd: Command) -> Command {
cmd.arg("-layout").arg("--").arg(inp_fname).arg("-");
fn command(&self, filepath_hint: &Path, mut cmd: Command) -> Command {
cmd.arg("-layout").arg("-").arg("-");
cmd
}
}

View File

@ -1,26 +1,93 @@
use super::*;
use std::io::Write;
use failure::*;
use std::io::prelude::*;
use std::io::BufReader;
use std::process::Command;
use std::process::Stdio;
use failure::*;
use std::thread;
pub trait SpawningFileAdapter: GetMetadata {
fn get_exe(&self) -> &str;
fn command(&self, inp_fname: &Path, command: Command) -> Command;
fn command(&self, filepath_hint: &Path, command: Command) -> Command;
fn postproc(line_prefix: &str, inp: &mut Read, oup: &mut Write) -> Fallible<()> {
//std::io::copy(inp, oup)?;
for line in BufReader::new(inp).lines() {
oup.write_all(format!("{}{}\n", line_prefix, line?).as_bytes())?;
}
Ok(())
}
}
pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
use std::io::ErrorKind::*;
match err.kind() {
NotFound => format_err!("Could not find executable \"{}\". {}", exe_name, help),
_ => Error::from(err)
_ => Error::from(err),
}
}
pub fn pipe_output(mut cmd: Command, oup: &mut dyn Write, exe_name: &str, help: &str) -> Fallible<()> {
let mut cmd = cmd.stdout(Stdio::piped()).spawn().map_err(|e| map_exe_error(e, exe_name, help))?;
let stdo = cmd.stdout.as_mut().expect("is piped");
std::io::copy(stdo, oup)?;
/*fn pipe(a: &mut dyn Read, b: &mut dyn Write, c: &mut dyn Read, d: &mut dyn Write) {
let mut buf = vec![0u8; 2 << 13];
loop {
match a.read(&buf) {
}
}
}*/
/*pub fn copy<R: ?Sized, W: ?Sized>(
name: &str,
reader: &mut R,
writer: &mut W,
) -> std::io::Result<u64>
where
R: Read,
W: Write,
{
eprintln!("START COPY {}", name);
let mut zz = vec![0; 1 << 13];
let mut buf: &mut [u8] = zz.as_mut();
let mut written = 0;
loop {
let r = reader.read(buf);
eprintln!("{}read: {:?}", name, r);
let len = match r {
Ok(0) => return Ok(written),
Ok(len) => len,
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
writer.write_all(&buf[..len])?;
written += len as u64;
}
}*/
pub fn pipe_output(
line_prefix: &str,
mut cmd: Command,
inp: &mut (dyn Read),
oup: &mut (dyn Write + Send),
exe_name: &str,
help: &str,
cp: fn(line_prefix: &str, &mut dyn Read, &mut dyn Write) -> Fallible<()>,
) -> Fallible<()> {
let mut cmd = cmd
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.map_err(|e| map_exe_error(e, exe_name, help))?;
let mut stdi = cmd.stdin.take().expect("is piped");
let mut stdo = cmd.stdout.take().expect("is piped");
crossbeam::scope(|s| -> Fallible<()> {
s.spawn(|_| cp(line_prefix, &mut stdo, oup).unwrap()); // errors?
std::io::copy(inp, &mut stdi)?;
drop(stdi); // NEEDED! otherwise deadlock
Ok(())
})
.unwrap()?;
let status = cmd.wait()?;
if status.success() {
Ok(())
@ -33,8 +100,23 @@ impl<T> FileAdapter for T
where
T: SpawningFileAdapter,
{
fn adapt(&self, inp_fname: &Path, oup: &mut dyn Write) -> Fallible<()> {
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
let AdaptInfo {
filepath_hint,
inp,
oup,
line_prefix,
..
} = ai;
let cmd = Command::new(self.get_exe());
pipe_output(self.command(inp_fname, cmd), oup, self.get_exe(), "")
pipe_output(
line_prefix,
self.command(filepath_hint, cmd),
inp,
oup,
self.get_exe(),
"",
Self::postproc,
)
}
}

72
src/adapters/zip.rs Normal file
View File

@ -0,0 +1,72 @@
use super::*;
use crate::preproc::rga_preproc;
use failure::*;
use lazy_static::lazy_static;
use std::fs::File;
// todo:
// maybe todo: read list of extensions from
//ffmpeg -demuxers | tail -n+5 | awk '{print $2}' | while read demuxer; do echo MUX=$demuxer; ffmpeg -h demuxer=$demuxer | grep 'Common extensions'; done 2>/dev/null
static EXTENSIONS: &[&str] = &["zip"];
lazy_static! {
static ref METADATA: AdapterMeta = AdapterMeta {
name: "zip".to_owned(),
version: 1,
matchers: EXTENSIONS
.iter()
.map(|s| Matcher::FileExtension(s.to_string()))
.collect(),
};
}
pub struct ZipAdapter;
impl ZipAdapter {
pub fn new() -> ZipAdapter {
ZipAdapter
}
}
impl GetMetadata for ZipAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
&METADATA
}
}
impl FileAdapter for ZipAdapter {
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
use std::io::prelude::*;
let AdaptInfo {
filepath_hint,
mut inp,
oup,
line_prefix,
..
} = ai;
loop {
match ::zip::read::read_zipfile_from_stream(&mut inp) {
Ok(None) => break,
Ok(Some(mut file)) => {
eprintln!(
"{}|{}: {} bytes ({} bytes packed)",
filepath_hint.to_string_lossy(),
file.name(),
file.size(),
file.compressed_size()
);
let line_prefix = &format!("{}{}:/", line_prefix, file.name().clone());
rga_preproc(
AdaptInfo {
filepath_hint: &file.sanitized_name(),
inp: &mut file,
oup: oup,
line_prefix,
},
None,
)?;
}
Err(e) => return Err(e.into()),
}
}
Ok(())
}
}

View File

@ -1,110 +1,30 @@
use failure::{format_err, Error};
use path_clean::PathClean;
use rga::adapters::*;
use rga::preproc::*;
use rga::CachingWriter;
use failure::{Error, format_err};
// longest compressed conversion output to save in cache
const MAX_DB_BLOB_LEN: usize = 2000000;
const ZSTD_LEVEL: i32 = 12;
fn open_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Error> {
let app_cache = cachedir::CacheDirConfig::new("rga").get_cache_dir()?;
let db_arc = rkv::Manager::singleton()
.write()
.expect("could not write db manager")
.get_or_create(app_cache.as_path(), |p| {
let mut builder = rkv::Rkv::environment_builder();
builder
.set_flags(rkv::EnvironmentFlags::NO_SYNC | rkv::EnvironmentFlags::WRITE_MAP) // not durable
.set_map_size(2 * 1024 * 1024 * 1024)
.set_max_dbs(100);
rkv::Rkv::from_env(p, builder)
})
.expect("could not get/create db");
Ok(db_arc)
}
use std::fs::File;
use std::path::PathBuf;
use std::rc::Rc;
fn main() -> Result<(), Error> {
//db.
let adapters = adapter_matcher()?;
let path = {
let filepath = std::env::args_os()
.skip(1)
.next()
.ok_or(format_err!("No filename specified"))?;
eprintln!("inp fname: {:?}", filepath);
let path = std::env::current_dir()?.join(&filepath);
std::env::current_dir()?.join(&filepath)
};
eprintln!("abs path: {:?}", path);
eprintln!("clean path: {:?}", path.clean());
let serialized_path: Vec<u8> =
bincode::serialize(&path.clean()).expect("could not serialize path"); // key in the cache database
let filename = path.file_name().ok_or(format_err!("Empty filename"))?;
/*let mimetype = tree_magic::from_filepath(path).ok_or(lerr(format!(
"File {} does not exist",
filename.to_string_lossy()
)))?;
println!("mimetype: {:?}", mimetype);*/
let adapter = adapters(FileMeta {
// mimetype,
lossy_filename: filename.to_string_lossy().to_string(),
});
match adapter {
Some(ad) => {
let meta = ad.metadata();
eprintln!("adapter: {}", &meta.name);
let db_name = format!("{}.v{}", meta.name, meta.version);
let db_arc = open_db()?;
let db_env = db_arc.read().unwrap();
let db = db_env
.open_single(db_name.as_str(), rkv::store::Options::create())
.map_err(|p| format_err!("could not open db store: {:?}", p))?;
let reader = db_env.read().expect("could not get reader");
match db
.get(&reader, &serialized_path)
.map_err(|p| format_err!("could not read from db: {:?}", p))?
{
Some(rkv::Value::Blob(cached)) => {
let stdouti = std::io::stdout();
zstd::stream::copy_decode(cached, stdouti.lock())?;
Ok(())
}
Some(_) => Err(format_err!("Integrity: value not blob")),
None => {
let stdouti = std::io::stdout();
let mut compbuf =
CachingWriter::new(stdouti.lock(), MAX_DB_BLOB_LEN, ZSTD_LEVEL)?;
ad.adapt(&path, &mut compbuf)?;
let compressed = compbuf.finish()?;
if let Some(cached) = compressed {
eprintln!("compressed len: {}", cached.len());
let ai = AdaptInfo {
inp: &mut File::open(&path)?,
filepath_hint: &path,
oup: &mut std::io::stdout(),
line_prefix: "",
};
{
let mut writer = db_env.write().map_err(|p| {
format_err!("could not open write handle to cache: {:?}", p)
})?;
db.put(&mut writer, &serialized_path, &rkv::Value::Blob(&cached))
.map_err(|p| format_err!("could not write to cache: {:?}", p))?;
writer.commit().unwrap();
}
}
Ok(())
}
}
}
None => {
let allow_cat = false;
if allow_cat {
eprintln!("no adapter for that file, running cat!");
let stdini = std::io::stdin();
let mut stdin = stdini.lock();
let stdouti = std::io::stdout();
let mut stdout = stdouti.lock();
std::io::copy(&mut stdin, &mut stdout)?;
Ok(())
} else {
Err(format_err!("No adapter found for file {:?}", filename))
}
}
}
rga_preproc(ai, Some(open_cache_db()?))
}

View File

@ -1,4 +1,5 @@
pub mod adapters;
mod caching_writer;
pub mod errors;
pub mod preproc;
pub use caching_writer::CachingWriter;

151
src/preproc.rs Normal file
View File

@ -0,0 +1,151 @@
use crate::adapters::*;
use crate::CachingWriter;
use failure::{format_err, Error};
use path_clean::PathClean;
use std::io::Read;
use std::path::Path;
use std::path::PathBuf;
use std::rc::Rc;
// longest compressed conversion output to save in cache
const MAX_DB_BLOB_LEN: usize = 2000000;
const ZSTD_LEVEL: i32 = 12;
pub fn open_cache_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Error> {
let app_cache = cachedir::CacheDirConfig::new("rga").get_cache_dir()?;
let db_arc = rkv::Manager::singleton()
.write()
.expect("could not write db manager")
.get_or_create(app_cache.as_path(), |p| {
let mut builder = rkv::Rkv::environment_builder();
builder
.set_flags(rkv::EnvironmentFlags::NO_SYNC | rkv::EnvironmentFlags::WRITE_MAP) // not durable
.set_map_size(2 * 1024 * 1024 * 1024)
.set_max_dbs(100);
rkv::Rkv::from_env(p, builder)
})
.expect("could not get/create db");
Ok(db_arc)
}
pub fn rga_preproc(
ai: AdaptInfo,
mb_db_arc: Option<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>>,
) -> Result<(), Error> {
let adapters = adapter_matcher()?;
let AdaptInfo {
filepath_hint,
inp,
oup,
line_prefix,
..
} = ai;
let filename = filepath_hint
.file_name()
.ok_or(format_err!("Empty filename"))?;
eprintln!("abs path: {:?}", filepath_hint);
/*let mimetype = tree_magic::from_filepath(path).ok_or(lerr(format!(
"File {} does not exist",
filename.to_string_lossy()
)))?;
println!("mimetype: {:?}", mimetype);*/
let adapter = adapters(FileMeta {
// mimetype,
lossy_filename: filename.to_string_lossy().to_string(),
});
match adapter {
Some(ad) => {
let meta = ad.metadata();
eprintln!("adapter: {}", &meta.name);
let db_name = format!("{}.v{}", meta.name, meta.version);
if let Some(db_arc) = mb_db_arc {
let cache_key: Vec<u8> = {
let clean_path = filepath_hint.to_owned().clean();
eprintln!("clean path: {:?}", clean_path);
let meta = std::fs::metadata(&filepath_hint)?;
let key = (
clean_path,
meta.modified().expect("weird OS that can't into mtime"),
);
eprintln!("cache key: {:?}", key);
bincode::serialize(&key).expect("could not serialize path") // key in the cache database
};
let db_env = db_arc.read().unwrap();
let db = db_env
.open_single(db_name.as_str(), rkv::store::Options::create())
.map_err(|p| format_err!("could not open db store: {:?}", p))?;
let reader = db_env.read().expect("could not get reader");
let cached = db
.get(&reader, &cache_key)
.map_err(|p| format_err!("could not read from db: {:?}", p))?;
match cached {
Some(rkv::Value::Blob(cached)) => {
let stdouti = std::io::stdout();
zstd::stream::copy_decode(cached, stdouti.lock())?;
Ok(())
}
Some(_) => Err(format_err!("Integrity: value not blob")),
None => {
let mut compbuf = CachingWriter::new(oup, MAX_DB_BLOB_LEN, ZSTD_LEVEL)?;
// start dupe
eprintln!("adapting...");
ad.adapt(AdaptInfo {
line_prefix,
filepath_hint,
inp,
oup: &mut compbuf,
})?;
// end dupe
let compressed = compbuf.finish()?;
if let Some(cached) = compressed {
eprintln!("compressed len: {}", cached.len());
{
let mut writer = db_env.write().map_err(|p| {
format_err!("could not open write handle to cache: {:?}", p)
})?;
db.put(&mut writer, &cache_key, &rkv::Value::Blob(&cached))
.map_err(|p| {
format_err!("could not write to cache: {:?}", p)
})?;
writer.commit().unwrap();
}
}
Ok(())
}
}
} else {
// todo: duplicate code
// start dupe
eprintln!("adapting...");
ad.adapt(AdaptInfo {
line_prefix,
filepath_hint,
inp,
oup,
})?;
// end dupe
Ok(())
}
}
None => {
let allow_cat = false;
if allow_cat {
eprintln!("no adapter for that file, running cat!");
let stdini = std::io::stdin();
let mut stdin = stdini.lock();
let stdouti = std::io::stdout();
let mut stdout = stdouti.lock();
std::io::copy(&mut stdin, &mut stdout)?;
Ok(())
} else {
Err(format_err!("No adapter found for file {:?}", filename))
}
}
}
}