initial working version

This commit is contained in:
phiresky 2019-06-04 20:08:26 +02:00
commit 16b4277d36
20 changed files with 776 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
**/*.rs.bk

3
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"editor.formatOnSave": true
}

376
Cargo.lock generated Normal file
View File

@ -0,0 +1,376 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "aho-corasick"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "autocfg"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bitflags"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cloudabi"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fixedbitset"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fnv"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "lazy_static"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "libc"
version = "0.2.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "lock_api"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "memchr"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "nom"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ordermap"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "parking_lot"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lock_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "parking_lot_core"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
"redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "petgraph"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_jitter 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_chacha"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rand_hc"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_isaac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_jitter"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_os"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_pcg"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_xorshift"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "redox_syscall"
version = "0.1.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "regex"
version = "1.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rga"
version = "0.1.0"
dependencies = [
"regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"tree_magic 0.2.1 (git+https://github.com/phiresky/tree_magic)",
]
[[package]]
name = "rustc_version"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "scopeguard"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "semver"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "semver-parser"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "smallvec"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "thread_local"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "tree_magic"
version = "0.2.1"
source = "git+https://github.com/phiresky/tree_magic#4b1c62f96ac0a6a81896620206e3393c084a7228"
dependencies = [
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"nom 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ucd-util"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "utf8-ranges"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum aho-corasick 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e6f484ae0c99fec2e858eb6134949117399f222608d84cadb3f58c1f97c2364c"
"checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)" = "a844cabbd5a77e60403a58af576f0a1baa83c3dd2670be63e615bd24fc58b82d"
"checksum lock_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ed946d4529956a20f2d63ebe1b69996d5a2137c91913fe3ebbeff957f5bca7ff"
"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
"checksum nom 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf51a729ecf40266a2368ad335a5fdde43471f545a967109cd62146ecf8b66ff"
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum parking_lot 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fa7767817701cce701d5585b9c4db3cdd02086398322c1d7e8bf5094a96a2ce7"
"checksum parking_lot_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cb88cb1cb3790baa6776844f968fea3be44956cf184fa1be5a03341f5491278c"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
"checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
"checksum rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0e7a549d590831370895ab7ba4ea0c1b6b011d106b5ff2da6eee112615e6dc0"
"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
"checksum rand_jitter 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b"
"checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
"checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
"checksum redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)" = "12229c14a0f65c4f1cb046a3b52047cdd9da1f4b30f8a39c5063c8bae515e252"
"checksum regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8f0a0bcab2fd7d1d7c54fa9eae6f43eddeb9ce2e7352f8518a814a4f65d60c58"
"checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d"
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
"checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum tree_magic 0.2.1 (git+https://github.com/phiresky/tree_magic)" = "<none>"
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
"checksum winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

15
Cargo.toml Normal file
View File

@ -0,0 +1,15 @@
cargo-features = ["default-run"]
[package]
name = "rga"
version = "0.1.0"
authors = ["phiresky <phireskyde+git@gmail.com>"]
edition = "2018"
default-run = "rga"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
tree_magic = { git = "https://github.com/phiresky/tree_magic" }
regex = "1.1.6"

1
README.md Normal file
View File

@ -0,0 +1 @@
similar: https://gist.github.com/ColonolBuendia/314826e37ec35c616d70506c38dc65aa

BIN
exampledir/formatting.epub Normal file

Binary file not shown.

BIN
exampledir/wasteland.docx Normal file

Binary file not shown.

BIN
exampledir/wasteland.epub Normal file

Binary file not shown.

BIN
exampledir/wasteland.mkv Normal file

Binary file not shown.

BIN
exampledir/wasteland.pdf Normal file

Binary file not shown.

33
muxers Normal file
View File

@ -0,0 +1,33 @@
MUX=ass
Muxer ass [SSA (SubStation Alpha) subtitle]:
Common extensions: ass,ssa.
Mime type: text/x-ass.
Default subtitle codec: ass.
ass muxer AVOptions:
-ignore_readorder <boolean> E........ write events immediately, even if they're out-of-order (default false)
MUX=jacosub
Muxer jacosub [JACOsub subtitle format]:
Common extensions: jss,js.
Mime type: text/x-jacosub.
Default subtitle codec: jacosub.
MUX=microdvd
Muxer microdvd [MicroDVD subtitle format]:
Common extensions: sub.
Mime type: text/x-microdvd.
Default subtitle codec: microdvd.
MUX=srt
Muxer srt [SubRip subtitle]:
Common extensions: srt.
Mime type: application/x-subrip.
Default subtitle codec: subrip.
MUX=sup
Muxer sup [raw HDMV Presentation Graphic Stream subtitles]:
Common extensions: sup.
Mime type: application/x-pgs.
Default subtitle codec: hdmv_pgs_subtitle.
MUX=webvtt
Muxer webvtt [WebVTT subtitle]:
Common extensions: vtt.
Mime type: text/vtt.
Default subtitle codec: webvtt.

1
rustfmt.toml Normal file
View File

@ -0,0 +1 @@
edition = "2018"

76
src/adapters.rs Normal file
View File

@ -0,0 +1,76 @@
pub mod ffmpeg;
pub mod pandoc;
pub mod poppler;
pub mod spawning;
//pub use ffmpeg::FffmpegAdapter;
use regex::{Regex, RegexSet};
use std::collections::HashMap;
use std::ffi::OsString;
use std::io::BufRead;
use std::io::Write;
use std::rc::Rc;
pub enum Matcher {
MimeType(Regex), // todo: generic pattern?
FileName(Regex),
}
pub struct AdapterMeta {
pub name: String,
pub version: i32,
pub matchers: Vec<Matcher>,
}
pub struct FileMeta {
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
// and since we probably only want to do matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
pub lossy_filename: String,
pub mimetype: String,
}
pub trait GetMetadata {
fn metadata<'a>(&'a self) -> &'a AdapterMeta;
}
pub trait FileAdapter: GetMetadata {
fn adapt(&self, inp_fname: &str, oup: &mut dyn Write) -> std::io::Result<()>;
}
pub fn ExtensionMatcher(extension: &str) -> Matcher {
let regex = Regex::new(&format!(".*\\.{}", &regex::escape(extension)))
.expect("we know this regex compiles");
Matcher::FileName(regex)
}
pub fn init_adapters() -> Result<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>>, regex::Error> {
let adapters: Vec<Rc<dyn FileAdapter>> = vec![
Rc::new(crate::adapters::ffmpeg::FFmpegAdapter::new()),
Rc::new(crate::adapters::pandoc::PandocAdapter::new()),
Rc::new(crate::adapters::poppler::PopplerAdapter::new()),
];
let mut fname_regexes = vec![];
let mut mime_regexes = vec![];
for adapter in adapters.into_iter() {
let metadata = adapter.metadata();
for matcher in &metadata.matchers {
match matcher {
Matcher::MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
Matcher::FileName(re) => fname_regexes.push((re.clone(), adapter.clone())),
};
}
}
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
return Ok(move |meta: FileMeta| {
// todo: handle multiple matches
for m in fname_regex_set.matches(&meta.lossy_filename) {
return Some(fname_regexes[m].1.clone());
}
for m in mime_regex_set.matches(&meta.mimetype) {
return Some(mime_regexes[m].1.clone());
}
return None;
});
}

43
src/adapters/ffmpeg.rs Normal file
View File

@ -0,0 +1,43 @@
use super::*;
use spawning::SpawningFileAdapter;
use std::io::Write;
use std::process::Command;
pub struct FFmpegAdapter {
_metadata: AdapterMeta,
}
// maybe todo: read from
// ffmpeg -demuxers
// ffmpeg -h demuxer=xyz
static extensions: &[&str] = &["mkv", "mp4", "avi"];
impl FFmpegAdapter {
pub fn new() -> FFmpegAdapter {
FFmpegAdapter {
_metadata: AdapterMeta {
name: "FFmpeg".to_owned(),
version: 1,
matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(),
},
}
}
}
impl GetMetadata for FFmpegAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
&self._metadata
}
}
impl SpawningFileAdapter for FFmpegAdapter {
fn command(&self, inp_fname: &str) -> Command {
let mut cmd = Command::new("ffmpeg");
cmd.arg("-hide_banner")
.arg("-loglevel")
.arg("panic")
.arg("-i")
.arg(inp_fname)
.arg("-f")
.arg("webvtt")
.arg("-");
cmd
}
}

77
src/adapters/pandoc.rs Normal file
View File

@ -0,0 +1,77 @@
use super::*;
use spawning::SpawningFileAdapter;
use std::io::Write;
use std::process::Command;
// from https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/App/FormatHeuristics.hs
// excluding formats that could cause problems (db = sqlite) or that are already text formats (e.g. xml-based)
//"db" -> Just "docbook"
//"adoc" -> Just "asciidoc"
//"asciidoc" -> Just "asciidoc"
//"context" -> Just "context"
//"ctx" -> Just "context"
//"dokuwiki" -> Just "dokuwiki"
//"htm" -> Just "html"
//"html" -> Just "html"
//"json" -> Just "json"
//"latex" -> Just "latex"
//"lhs" -> Just "markdown+lhs"
//"ltx" -> Just "latex"
//"markdown" -> Just "markdown"
//"md" -> Just "markdown"
//"ms" -> Just "ms"
//"muse" -> Just "muse"
//"native" -> Just "native"
//"opml" -> Just "opml"
//"org" -> Just "org"
//"roff" -> Just "ms"
//"rst" -> Just "rst"
//"s5" -> Just "s5"
//"t2t" -> Just "t2t"
//"tei" -> Just "tei"
//"tei.xml" -> Just "tei"
//"tex" -> Just "latex"
//"texi" -> Just "texinfo"
//"texinfo" -> Just "texinfo"
//"textile" -> Just "textile"
//"text" -> Just "markdown"
//"txt" -> Just "markdown"
//"xhtml" -> Just "html"
//"wiki" -> Just "mediawiki"
static extensions: &[&str] = &["epub", "odt", "docx", "pptx", "fb2", "icml", "rtf", "ipynb"];
pub struct PandocAdapter {
_metadata: AdapterMeta,
}
impl PandocAdapter {
pub fn new() -> PandocAdapter {
PandocAdapter {
_metadata: AdapterMeta {
name: "pandoc".to_owned(),
version: 1,
// todo: read from ffmpeg -demuxers?
matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(),
},
}
}
}
impl GetMetadata for PandocAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
&self._metadata
}
}
impl SpawningFileAdapter for PandocAdapter {
fn command(&self, inp_fname: &str) -> Command {
let mut cmd = Command::new("pandoc");
cmd
// simpler markown (with more information loss but plainer text)
.arg("--to=markdown-header_attributes-link_attributes-fenced_divs-markdown_in_html_blocks-raw_html-native_divs-native_spans-bracketed_spans")
.arg("--wrap=none")
.arg("--atx-headers")
.arg("--")
.arg(inp_fname);
cmd
}
}

37
src/adapters/poppler.rs Normal file
View File

@ -0,0 +1,37 @@
use super::*;
use spawning::SpawningFileAdapter;
use std::io::Read;
use std::io::Write;
use std::process::Command;
use std::process::Stdio;
static extensions: &[&str] = &["pdf"];
pub struct PopplerAdapter {
_metadata: AdapterMeta,
}
impl PopplerAdapter {
pub fn new() -> PopplerAdapter {
PopplerAdapter {
_metadata: AdapterMeta {
name: "poppler pdftotext".to_owned(),
version: 1,
// todo: read from ffmpeg -demuxers?
matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(),
},
}
}
}
impl GetMetadata for PopplerAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
&self._metadata
}
}
impl SpawningFileAdapter for PopplerAdapter {
fn command(&self, inp_fname: &str) -> Command {
let mut cmd = Command::new("pdftotext");
cmd.arg("-layout").arg("--").arg(inp_fname).arg("-");
cmd
}
}

28
src/adapters/spawning.rs Normal file
View File

@ -0,0 +1,28 @@
use super::*;
use std::io::Write;
use std::process::Command;
use std::process::Stdio;
pub trait SpawningFileAdapter: GetMetadata {
fn command(&self, inp_fname: &str) -> Command;
}
impl<T> FileAdapter for T
where
T: SpawningFileAdapter,
{
fn adapt(&self, inp_fname: &str, oup: &mut dyn Write) -> std::io::Result<()> {
let mut cmd = self.command(inp_fname).stdout(Stdio::piped()).spawn()?;
let stdo = cmd.stdout.as_mut().expect("is piped");
std::io::copy(stdo, oup)?;
let status = cmd.wait()?;
if status.success() {
Ok(())
} else {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"subprocess failed",
))
}
}
}

67
src/bin/rga-preproc.rs Normal file
View File

@ -0,0 +1,67 @@
use rga::adapters::*;
use std::error::Error;
use std::fmt;
use std::path::Path;
use tree_magic;
#[derive(Debug)]
struct ShittyError;
impl Error for ShittyError {}
impl fmt::Display for ShittyError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "ShittyError")
}
}
fn main() -> Result<(), Box<dyn Error>> {
let adapters = init_adapters()?;
//let ad = &adapters[0];
//let z: &str = &ad.metadata().name;
// todo: how to make this less indenty?
match std::env::args().skip(1).next() {
Some(filepath) => {
println!("fname: {}", filepath);
let path = Path::new(&filepath);
let maybe_filename = path.file_name();
match maybe_filename {
Some(filename) => {
let result = tree_magic::from_filepath(path);
match result {
Some(mimetype) => {
println!("mimetype: {:?}", mimetype);
let adapter = adapters(FileMeta {
mimetype,
lossy_filename: filename.to_string_lossy().to_string(),
});
match adapter {
Some(ad) => {
println!("adapter: {}", &ad.metadata().name);
let stdouti = std::io::stdout();
let mut stdout = stdouti.lock();
ad.adapt(&filepath, &mut stdout)?;
Ok(())
}
None => {
eprintln!("no adapter for that file, running cat!");
let stdini = std::io::stdin();
let mut stdin = stdini.lock();
let stdouti = std::io::stdout();
let mut stdout = stdouti.lock();
std::io::copy(&mut stdin, &mut stdout)?;
Ok(())
}
}
}
None => Err("file does not exist".into()),
}
}
None => Err("Empty filename".into()),
}
}
None => Err("No filename specified".into()),
}
}

16
src/bin/rga.rs Normal file
View File

@ -0,0 +1,16 @@
use rga::adapters;
use std::process::Command;
fn main() -> std::io::Result<()> {
let exe = std::env::current_exe().expect("Could not get executable location");
let preproc_exe = exe.with_file_name("rga-preproc");
let mut child = Command::new("rg")
.arg("--pre")
.arg(preproc_exe)
.args(std::env::args().skip(1))
.spawn()?;
child.wait()?;
Ok(())
}

1
src/lib.rs Normal file
View File

@ -0,0 +1 @@
pub mod adapters;