From bcc01f7a62fea3f5cc49247fc8cc510fe30441ff Mon Sep 17 00:00:00 2001 From: phiresky Date: Thu, 10 Sep 2020 17:18:11 +0200 Subject: [PATCH] refactoring (partial) --- CHANGELOG.md | 1 + Cargo.lock | 403 +++++++++++++++++-------------------- Cargo.toml | 38 ++-- src/adapters.rs | 43 ++-- src/adapters/custom.rs | 9 +- src/adapters/decompress.rs | 3 +- src/adapters/ffmpeg.rs | 3 +- src/adapters/fns.rs | 18 -- src/adapters/pdfpages.rs | 7 +- src/adapters/poppler.rs | 29 --- src/adapters/spawning.rs | 10 +- src/adapters/sqlite.rs | 1 + src/adapters/tar.rs | 24 ++- src/adapters/tesseract.rs | 5 +- src/adapters/writing.rs | 16 +- src/adapters/zip.rs | 32 +-- src/caching_writer.rs | 2 +- src/preproc.rs | 9 +- src/preproc_cache.rs | 9 +- 19 files changed, 309 insertions(+), 353 deletions(-) delete mode 100644 src/adapters/poppler.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 47c273e..e0e0936 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - add cross-platform rga-fzf binary - add a config file (~/.config/ripgrep-all) that is generated on first use, including schema - change adapter interface from `(&Read, &Write) -> ()` to `Read -> Read` to allow chaining of adapters + - this means that all adapters are now run in their own thread, data passed via a pipe. might cause performance regressions # 0.9.6 (2020-05-19) diff --git a/Cargo.lock b/Cargo.lock index 388e8ec..00adc34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,15 +2,15 @@ # It is not intended for manual editing. [[package]] name = "adler32" -version = "1.0.4" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] name = "aho-corasick" -version = "0.7.10" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" +checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86" dependencies = [ "memchr", ] @@ -26,9 +26,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.31" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f" +checksum = "6b602bfe940d21c130f3895acd65221e8a61270debe89d628b9cb4e3ccb8569b" [[package]] name = "arrayref" @@ -36,15 +36,6 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" -[[package]] -name = "arrayvec" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" -dependencies = [ - "nodrop", -] - [[package]] name = "arrayvec" version = "0.5.1" @@ -64,21 +55,21 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "base64" -version = "0.11.0" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7" +checksum = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff" [[package]] name = "bincode" -version = "1.2.1" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5753e2a71534719bf3f4e57006c3a4f0d2c672a4b676eec84161f763eca87dbf" +checksum = "f30d3a39baa26f9651f17b375061f3233dde33424a8b72b0dbe93a68a0bc896d" dependencies = [ "byteorder", "serde", @@ -97,7 +88,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8fb2d74254a3a0b5cac33ac9f8ed0e44aa50378d9dbb2e5d83bd21ed1dc2c8a" dependencies = [ "arrayref", - "arrayvec 0.5.1", + "arrayvec", "constant_time_eq", ] @@ -117,6 +108,16 @@ dependencies = [ "libc", ] +[[package]] +name = "bzip2" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "275d84fe348b838dc49477d39770682839b3e73e21a3eadc07b12924f1a9fcbe" +dependencies = [ + "bzip2-sys", + "libc", +] + [[package]] name = "bzip2-sys" version = "0.1.9+1.0.8" @@ -130,9 +131,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.54" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bbb73db36c1246e9034e307d0fba23f9a2e251faa47ade70c1bd252220c8311" +checksum = "66120af515773fb005778dc07c261bd201ec8ce50bd6e7144c927753fe013381" dependencies = [ "jobserver", ] @@ -145,9 +146,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" [[package]] name = "chrono" -version = "0.4.11" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2" +checksum = "942f72db697d8767c22d46a598e01f2d3b475501ea43d0db4f16d90259182d0b" dependencies = [ "num-integer", "num-traits", @@ -156,9 +157,9 @@ dependencies = [ [[package]] name = "clap" -version = "2.33.1" +version = "2.33.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" dependencies = [ "ansi_term", "atty", @@ -201,9 +202,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.4.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cced8691919c02aac3cb0a1bc2e9b73d89e832bf9a06fc579d4e71b68a2da061" +checksum = "b153fe7cbef478c567df0f972e02e6d736db11affe43dfc9c56a9374d1adfb87" dependencies = [ "crossbeam-utils", "maybe-uninit", @@ -237,12 +238,13 @@ dependencies = [ [[package]] name = "crossbeam-queue" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6bffe714b6bb07e42f201352c34f51fefd355ace793f9e638ebd52d23f98d2" +checksum = "774ba60a54c213d409d5353bda12d49cd68d14e45036a285234c8d6f91f92570" dependencies = [ "cfg-if", "crossbeam-utils", + "maybe-uninit", ] [[package]] @@ -258,9 +260,9 @@ dependencies = [ [[package]] name = "derive_more" -version = "0.99.7" +version = "0.99.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2127768764f1556535c01b5326ef94bd60ff08dcfbdc544d53e69ed155610f5d" +checksum = "298998b1cf6b5b2c8a7b023dfd45821825ce3ba8a8af55c921a0e734e4653f76" dependencies = [ "proc-macro2", "quote", @@ -311,21 +313,21 @@ dependencies = [ [[package]] name = "dyn-clone" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3ec9c7fb9a2ce708751c98e31ccbae74b6ab194f5c8e30cfb7ed62e38b70866" +checksum = "4c53dc3a653e0f64081026e4bf048d48fec9fce90c66e8326ca7292df0ff2d82" [[package]] name = "either" -version = "1.5.3" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" +checksum = "cd56b59865bce947ac5958779cfa508f6c3b9497cc762b7e24a12d11ccde2c4f" [[package]] name = "encoding_rs" -version = "0.8.23" +version = "0.8.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8ac63f94732332f44fe654443c46f6375d1939684c17b0afb6cb56b0456e171" +checksum = "a51b8cf747471cb9499b6d59e59b0444f4c90eba8968c4e44874e92b5b64ace2" dependencies = [ "cfg-if", ] @@ -387,9 +389,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "filetime" -version = "0.2.10" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "affc17579b132fc2461adf7c575cc6e8b134ebca52c51f5411388965227dc695" +checksum = "3ed85775dcc68644b5c950ac06a2b23768d3bc9390464151aaf27136998dcf9e" dependencies = [ "cfg-if", "libc", @@ -441,13 +443,13 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.1.14" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" +checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.9.0+wasi-snapshot-preview1", ] [[package]] @@ -456,6 +458,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +[[package]] +name = "hashbrown" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00d63df3d41950fb462ed38308eea019113ad1508da725bbedcd0fa5a85ef5f7" + [[package]] name = "heck" version = "0.3.1" @@ -467,9 +475,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.1.13" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71" +checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9" dependencies = [ "libc", ] @@ -496,11 +504,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.4.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c398b2b113b55809ceb9ee3e753fcbac793f1956663f3c36549c1346015c2afe" +checksum = "55e2e4c765aa53a0424761bf9f41aa7a6ac1efa87238f59560640e27fca028f2" dependencies = [ "autocfg", + "hashbrown", "serde", ] @@ -515,9 +524,9 @@ dependencies = [ [[package]] name = "itoa" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" +checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" [[package]] name = "jobserver" @@ -536,28 +545,28 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "lexical-core" -version = "0.6.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7043aa5c05dd34fb73b47acb8c3708eac428de4545ea3682ed2f11293ebd890" +checksum = "db65c6da02e61f55dae90a0ae427b2a5f6b3e8db09f58d10efab23af92592616" dependencies = [ - "arrayvec 0.4.12", + "arrayvec", + "bitflags", "cfg-if", - "rustc_version", "ryu", "static_assertions", ] [[package]] name = "libc" -version = "0.2.71" +version = "0.2.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49" +checksum = "f2f96b10ec2560088a8e76961b00d47107b3a625fecb76dedb29ee7ccbf98235" [[package]] name = "libsqlite3-sys" -version = "0.18.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e704a02bcaecd4a08b93a23f6be59d0bd79cd161e0963e9499165a0a35df7bd" +checksum = "e3a245984b1b06c291f46e27ebda9f369a94a1ab8461d0e845e23f9ced01f5db" dependencies = [ "cc", "pkg-config", @@ -595,9 +604,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.8" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" +checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" dependencies = [ "cfg-if", ] @@ -642,33 +651,27 @@ checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" [[package]] name = "memoffset" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4fc2c02a7e374099d4ee95a193111f72d2110197fe200272371758f6c3643d8" +checksum = "c198b026e1bbf08a937e94c6c60f9ec4a2267f5b0d2eec9c1b21b061ce2be55f" dependencies = [ "autocfg", ] [[package]] name = "miniz_oxide" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa679ff6578b1cddee93d7e82e263b94a575e0bfced07284eb0c037c1d2416a5" +checksum = "791daaae1ed6889560f8c4359194f56648355540573244a5448a83ba1ecc7435" dependencies = [ "adler32", ] -[[package]] -name = "nodrop" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" - [[package]] name = "nom" -version = "5.1.1" +version = "5.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b471253da97532da4b61552249c521e01e736071f71c1a4f7ebbfbf0a06aad6" +checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" dependencies = [ "lexical-core", "memchr", @@ -700,9 +703,9 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.42" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f6ea62e9d81a77cd3ee9a2a5b9b609447857f3d358704331e4ef39eb247fcba" +checksum = "8d59457e662d541ba17869cf51cf177c0b5f0cbf476c66bdc90bf1edac4f875b" dependencies = [ "autocfg", "num-traits", @@ -710,9 +713,9 @@ dependencies = [ [[package]] name = "num-iter" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfb0800a0291891dd9f4fe7bd9c19384f98f7fbe0cd0f39a2c6b88b9868bbc00" +checksum = "7a6e6b7c748f995c4c29c5f5ae0248536e04a5739927c74ec0fa564805094b9f" dependencies = [ "autocfg", "num-integer", @@ -732,43 +735,27 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" +checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611" dependencies = [ "autocfg", ] [[package]] name = "ordered-float" -version = "1.0.2" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18869315e81473c951eb56ad5558bbc56978562d3ecfb87abb7a1e944cea4518" +checksum = "3741934be594d77de1c8461ebcbbe866f585ea616a9753aa78f2bdc69f0e4579" dependencies = [ "num-traits", ] [[package]] name = "paste" -version = "0.1.16" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d508492eeb1e5c38ee696371bf7b9fc33c83d46a7d451606b96458fbbbdc2dec" -dependencies = [ - "paste-impl", - "proc-macro-hack", -] - -[[package]] -name = "paste-impl" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84f328a6a63192b333fce5fbb4be79db6758a4d518dfac6d54412f1492f72d32" -dependencies = [ - "proc-macro-hack", - "proc-macro2", - "quote", - "syn", -] +checksum = "f6ddc8e145de01d9180ac7b78b9676f95a9c2447f6a88b2c2a04702211bc5d71" [[package]] name = "path-clean" @@ -794,21 +781,15 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.17" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677" - -[[package]] -name = "podio" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b18befed8bc2b61abc79a457295e7e838417326da1586050b919414073977f19" +checksum = "d36492546b6af1463394d46f0c834346f31548646f6ba10849802c9c9a27ac33" [[package]] name = "ppv-lite86" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea" +checksum = "c36fa947111f5c62a733b652544dd0016a43ce89619538a8ef92724a6f501a20" [[package]] name = "pretty-bytes" @@ -822,9 +803,9 @@ dependencies = [ [[package]] name = "proc-macro-error" -version = "1.0.2" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98e9e4b82e0ef281812565ea4751049f1bdcdfccda7d3f459f2e138a40c08678" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", "proc-macro2", @@ -835,28 +816,20 @@ dependencies = [ [[package]] name = "proc-macro-error-attr" -version = "1.0.2" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f5444ead4e9935abd7f27dc51f7e852a0569ac888096d5ec2499470794e2e53" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ "proc-macro2", "quote", - "syn", - "syn-mid", "version_check", ] -[[package]] -name = "proc-macro-hack" -version = "0.5.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e0456befd48169b9f13ef0f0ad46d492cf9d2dbb918bcf38e01eed4ce3ec5e4" - [[package]] name = "proc-macro2" -version = "1.0.18" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa" +checksum = "36e28516df94f3dd551a587da5357459d9b36d945a7c37c3557928c1c2ff2a2c" dependencies = [ "unicode-xid", ] @@ -919,15 +892,15 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.1.56" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" [[package]] name = "redox_users" -version = "0.3.4" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b23093265f8d200fa7b4c2c76297f47e681c655f6f1285a8780d6a022f7431" +checksum = "de0737333e7a9502c789a36d7c7fa6092a49895d4faa31ca5df163857ded2e9d" dependencies = [ "getrandom", "redox_syscall", @@ -954,9 +927,9 @@ checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" [[package]] name = "remove_dir_all" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" dependencies = [ "winapi", ] @@ -967,7 +940,7 @@ version = "0.9.7-alpha.0" dependencies = [ "anyhow", "bincode", - "bzip2", + "bzip2 0.4.1", "chrono", "clap", "crossbeam", @@ -1025,9 +998,9 @@ dependencies = [ [[package]] name = "rusqlite" -version = "0.23.1" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45d0fd62e1df63d254714e6cb40d0a0e82e7a1623e7a27f679d851af092ae58b" +checksum = "4c78c3275d9d6eb684d2db4b2388546b32fdae0586c20a82f3905d21ea78b9ef" dependencies = [ "bitflags", "fallible-iterator", @@ -1037,14 +1010,13 @@ dependencies = [ "lru-cache", "memchr", "smallvec", - "time", ] [[package]] name = "rust-argon2" -version = "0.7.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bc8af4bda8e1ff4932523b94d3dd20ee30a87232323eda55903ffd71d2fb017" +checksum = "9dab61250775933275e84053ac235621dfb739556d5c54a2f2e9313b7cf43a19" dependencies = [ "base64", "blake2b_simd", @@ -1052,15 +1024,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "rustc_version" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -dependencies = [ - "semver", -] - [[package]] name = "ryu" version = "1.0.5" @@ -1069,9 +1032,9 @@ checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" [[package]] name = "schemars" -version = "0.8.0-alpha-2" +version = "0.8.0-alpha-4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0d3111dca36beaa5be680b8d031d2416e5d0e66aac8118893d42792a6ea8996" +checksum = "fb19de085c2896c0f4ac42cb2af046ec769be3fdcaf8e93a599f5cbbdf543ffa" dependencies = [ "dyn-clone", "indexmap", @@ -1082,9 +1045,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.8.0-alpha-2" +version = "0.8.0-alpha-4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e066c77ba237124b99881dfb3022cd7f4b477e19abcdfffd264c6693929a0a5" +checksum = "1fd0c2d87acadcb53176cee5cb10eb3d4024de3d3619dd38d0041ce53c601748" dependencies = [ "proc-macro2", "quote", @@ -1098,35 +1061,20 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "semver" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -dependencies = [ - "semver-parser", -] - -[[package]] -name = "semver-parser" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" - [[package]] name = "serde" -version = "1.0.111" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9124df5b40cbd380080b2cc6ab894c040a3070d995f5c9dc77e18c34a8ae37d" +checksum = "e54c9a88f2da7238af84b5101443f0c0d0a3bbdc455e34a5c9497b1903ed55d5" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.111" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f2c3ac8e6ca1e9c80b8be1023940162bf81ae3cffbb1809474152f2ce1eb250" +checksum = "609feed1d0a73cc36a0182a840a9b37b4a82f0b1150369f0536a9e3f2a31dc48" dependencies = [ "proc-macro2", "quote", @@ -1146,9 +1094,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.53" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "993948e75b189211a9b31a7528f950c6adc21f9720b6438ff80a7fa2f864cea2" +checksum = "164eacbdb13512ec2745fb09d51fd5b22b0d65ed294a1dcf7285a360c80a675c" dependencies = [ "itoa", "ryu", @@ -1167,15 +1115,15 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7cb5678e1615754284ec264d9bb5b4c27d2018577fd90ac0ceb578591ed5ee4" +checksum = "fbee7696b84bbf3d89a1c2eccff0850e3047ed46bfcd2e92c29a2d074d57e252" [[package]] name = "static_assertions" -version = "0.3.4" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strsim" @@ -1185,9 +1133,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "structopt" -version = "0.3.14" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef" +checksum = "6cc388d94ffabf39b5ed5fadddc40147cb21e605f53db6f8f36a625d27489ac5" dependencies = [ "clap", "lazy_static", @@ -1196,9 +1144,9 @@ dependencies = [ [[package]] name = "structopt-derive" -version = "0.4.7" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d239ca4b13aee7a2142e6795cbd69e457665ff8037aed33b3effdc430d2f927a" +checksum = "5e2513111825077552a6751dfad9e11ce0fba07d7276a3943a037d7e93e64c5f" dependencies = [ "heck", "proc-macro-error", @@ -1209,26 +1157,15 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.30" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93a56fabc59dce20fe48b6c832cc249c713e7ed88fa28b0ee0a3bfcaae5fe4e2" +checksum = "963f7d3cc59b59b9325165add223142bbf1df27655d07789f109896d353d8350" dependencies = [ "proc-macro2", "quote", "unicode-xid", ] -[[package]] -name = "syn-mid" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "synstructure" version = "0.12.4" @@ -1243,9 +1180,9 @@ dependencies = [ [[package]] name = "tar" -version = "0.4.28" +version = "0.4.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c058ad0bd6ccb84faa24cc44d4fc99bee8a5d7ba9ff33aa4d993122d1aeeac2" +checksum = "489997b7557e9a43e192c527face4feacc78bfbe6eed67fd55c4c9e381cba290" dependencies = [ "filetime", "libc", @@ -1296,6 +1233,26 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thiserror" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dfdd070ccd8ccb78f4ad66bf1982dc37f620ef696c6b5028fe2ed83dd3d0d08" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.0.1" @@ -1307,14 +1264,21 @@ dependencies = [ [[package]] name = "time" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" dependencies = [ "libc", + "wasi 0.10.0+wasi-snapshot-preview1", "winapi", ] +[[package]] +name = "tinyvec" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "238ce071d267c5710f9d31451efec16c5ee22de34df17cc05e56cbc92e967117" + [[package]] name = "tree_magic_mini" version = "1.0.0" @@ -1344,11 +1308,11 @@ dependencies = [ [[package]] name = "unicode-normalization" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5479532badd04e128284890390c1e876ef7a993d0570b3597ae43dfa1d59afa4" +checksum = "6fb19cf769fa8c6a80a162df694621ebeb4dafb606470b2b2fce0be40a98a977" dependencies = [ - "smallvec", + "tinyvec", ] [[package]] @@ -1359,15 +1323,15 @@ checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" [[package]] name = "unicode-width" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" [[package]] name = "unicode-xid" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" [[package]] name = "url" @@ -1388,9 +1352,9 @@ checksum = "9fde2f6a4bea1d6e007c4ad38c6839fa71cbb63b6dbf5b595aa38dc9b1093c11" [[package]] name = "vcpkg" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55d1e41d56121e07f1e223db0a4def204e45c85425f6a16d462fd07c8d10d74c" +checksum = "6454029bf181f092ad1b853286f23e2c507d8e8194d01d92da4a55c274a5508c" [[package]] name = "vec_map" @@ -1411,10 +1375,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" [[package]] -name = "winapi" -version = "0.3.8" +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", @@ -1461,31 +1431,30 @@ dependencies = [ [[package]] name = "zip" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6df134e83b8f0f8153a094c7b0fd79dfebe437f1d76e7715afa18ed95ebe2fd7" +version = "0.5.8" dependencies = [ - "bzip2", + "byteorder", + "bzip2 0.3.3", "crc32fast", "flate2", - "podio", + "thiserror", "time", ] [[package]] name = "zstd" -version = "0.5.2+zstd.1.4.5" +version = "0.5.3+zstd.1.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "644352b10ce7f333d6e0af85bd4f5322dc449416dc1211c6308e95bca8923db4" +checksum = "01b32eaf771efa709e8308605bbf9319bf485dc1503179ec0469b611937c0cd8" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "2.0.4+zstd.1.4.5" +version = "2.0.5+zstd.1.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7113c0c9aed2c55181f2d9f5b0a36e7d2c0183b11c058ab40b35987479efe4d7" +checksum = "1cfb642e0d27f64729a639c52db457e0ae906e7bc6f5fe8f5c453230400f1055" dependencies = [ "libc", "zstd-sys", @@ -1493,9 +1462,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "1.4.16+zstd.1.4.5" +version = "1.4.17+zstd.1.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c442965efc45353be5a9b9969c9b0872fff6828c7e06d118dda2cb2d0bb11d5a" +checksum = "b89249644df056b522696b1bb9e7c18c87e8ffa3e2f0dc3b0155875d6498f01b" dependencies = [ "cc", "glob", diff --git a/Cargo.toml b/Cargo.toml index cddb686..6168b24 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,35 +20,35 @@ tree_magic = { package = "tree_magic_mini", version = "1.0.0" } regex = "1.3.9" rkv = "0.10.4" path-clean = "0.1.0" -bincode = "1.2.1" -serde = { version = "1.0.111", features = ["derive"] } -zstd = "0.5.2" +bincode = "1.3.1" +serde = { version = "1.0.115", features = ["derive"] } +zstd = "0.5.3" lazy_static = "1.4.0" -serde_json = "1.0.53" -zip = "0.5.5" +serde_json = "1.0.57" +zip = {path="../zip-rs"} crossbeam = "0.7.3" -clap = { version = "2.33.1", features = ["wrap_help"] } -log = "0.4.8" +clap = { version = "2.33.3", features = ["wrap_help"] } +log = "0.4.11" env_logger = "0.7.1" xz2 = "0.1.6" flate2 = "1.0.14" -bzip2 = "0.3.3" -tar = "0.4.28" -chrono = "0.4.11" -encoding_rs = "0.8.23" +bzip2 = "0.4.1" +tar = "0.4.30" +chrono = "0.4.15" +encoding_rs = "0.8.24" encoding_rs_io = "0.1.7" -rusqlite = { version = "0.23.1", features = ["vtab", "bundled"] } +rusqlite = { version = "0.24.0", features = ["vtab", "bundled"] } size_format = "1.0.2" -structopt = "0.3.14" -paste = "0.1.16" +structopt = "0.3.17" +paste = "1.0.0" tempfile = "3.1.0" glob = "0.3.0" -anyhow = "1.0.31" -schemars = {version = "0.8.0-alpha-2", features = ["preserve_order"]} +anyhow = "1.0.32" +schemars = { version = "0.8.0-alpha-4", features = ["preserve_order"] } directories-next = "1.0.1" -derive_more = "0.99.7" +derive_more = "0.99.9" pretty-bytes = "0.2.2" memchr = "2.3.3" -crossbeam-channel = "0.4.2" -dyn-clone = "1.0.1" +crossbeam-channel = "0.4.4" +dyn-clone = "1.0.2" dyn-clonable = "0.9.0" diff --git a/src/adapters.rs b/src/adapters.rs index 980c5f5..feddc85 100644 --- a/src/adapters.rs +++ b/src/adapters.rs @@ -2,14 +2,13 @@ pub mod custom; pub mod decompress; pub mod ffmpeg; pub mod fns; -//pub mod pdfpages; -pub mod poppler; +// pub mod pdfpages; pub mod spawning; pub mod sqlite; -//pub mod tar; -//pub mod tesseract; +// pub mod tar; +// pub mod tesseract; pub mod writing; -// pub mod zip; +pub mod zip; use crate::{config::RgaConfig, matching::*}; use anyhow::*; use custom::builtin_spawning_adapters; @@ -23,7 +22,7 @@ use std::iter::Iterator; use std::path::{Path, PathBuf}; use std::rc::Rc; -pub type ReadBox = Box; +pub type ReadBox<'a> = Box; pub struct AdapterMeta { /// unique short name of this adapter (a-z0-9 only) @@ -38,6 +37,10 @@ pub struct AdapterMeta { /// list of matchers when we have mime type detection active (interpreted as ORed) /// warning: this *overrides* the fast matchers pub slow_matchers: Option>, + /// if true, slow_matchers is merged with fast matchers if accurate is enabled + /// for example, in sqlite you want this disabled since the db extension can mean other things and the mime type matching is very accurate for sqlite. + /// but for tar you want it enabled, since the tar extension is very accurate but the tar mime matcher can have false negatives + pub keep_fast_matchers_if_accurate: bool, // if true, adapter is only used when user lists it in `--rga-adapters` pub disabled_by_default: bool, } @@ -47,9 +50,21 @@ impl AdapterMeta { &'a self, slow: bool, ) -> Box> + 'a> { - match (slow, &self.slow_matchers) { - (true, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))), - (_, _) => Box::new( + match ( + slow, + self.keep_fast_matchers_if_accurate, + &self.slow_matchers, + ) { + (true, false, Some(ref sm)) => Box::new(sm.iter().map(|e| Cow::Borrowed(e))), + (true, true, Some(ref sm)) => Box::new( + sm.iter().map(|e| Cow::Borrowed(e)).chain( + self.fast_matchers + .iter() + .map(|e| Cow::Owned(FileMatcher::Fast(e.clone()))), + ), + ), + // don't have slow matchers or slow matching disabled + (true, _, None) | (false, _, _) => Box::new( self.fast_matchers .iter() .map(|e| Cow::Owned(FileMatcher::Fast(e.clone()))), @@ -65,9 +80,9 @@ pub trait FileAdapter: GetMetadata { /// adapt a file. /// /// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher - fn adapt(&self, a: AdaptInfo, detection_reason: &FileMatcher) -> Result; + fn adapt<'a>(&self, a: AdaptInfo<'a>, detection_reason: &FileMatcher) -> Result>; } -pub struct AdaptInfo { +pub struct AdaptInfo<'a> { /// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions. pub filepath_hint: PathBuf, /// true if filepath_hint is an actual file on the file system @@ -75,7 +90,7 @@ pub struct AdaptInfo { /// depth at which this file is in archives. 0 for real filesystem pub archive_recursion_depth: i32, /// stream to read the file from. can be from a file or from some decoder - pub inp: ReadBox, + pub inp: ReadBox<'a>, /// prefix every output line with this string to better indicate the file's location if it is in some archive pub line_prefix: String, pub config: RgaConfig, @@ -95,12 +110,12 @@ pub fn get_all_adapters(custom_adapters: Option>) -> Ad let internal_adapters: Vec> = vec![ Rc::new(ffmpeg::FFmpegAdapter::new()), - //Rc::new(zip::ZipAdapter::new()), + Rc::new(zip::ZipAdapter::new()), Rc::new(decompress::DecompressAdapter::new()), // Rc::new(tar::TarAdapter::new()), Rc::new(sqlite::SqliteAdapter::new()), // Rc::new(pdfpages::PdfPagesAdapter::new()), - //Rc::new(tesseract::TesseractAdapter::new()), + // Rc::new(tesseract::TesseractAdapter::new()), ]; adapters.extend( builtin_spawning_adapters diff --git a/src/adapters/custom.rs b/src/adapters/custom.rs index 47c1647..3ecd6be 100644 --- a/src/adapters/custom.rs +++ b/src/adapters/custom.rs @@ -3,7 +3,7 @@ use super::{ AdapterMeta, GetMetadata, }; use crate::matching::{FastFileMatcher, FileMatcher}; -use anyhow::{Context, Result}; +use anyhow::Result; use lazy_static::lazy_static; use regex::{Captures, Regex}; use schemars::JsonSchema; @@ -25,6 +25,8 @@ pub struct CustomAdapterConfig { pub extensions: Vec, /// if not null and --rga-accurate is enabled, mime type matching is used instead of file name matching pub mimetypes: Option>, + /// if --rga-accurate, only match by mime types, ignore extensions completely + pub match_only_by_mime: Option, /// the name or path of the binary to run pub binary: String, /// The arguments to run the program with. Placeholders: @@ -89,7 +91,8 @@ lazy_static! { "--wrap=none", "--atx-headers" ]), - disabled_by_default: None + disabled_by_default: None, + match_only_by_mime: None }, CustomAdapterConfig { name: "poppler".to_owned(), @@ -103,6 +106,7 @@ lazy_static! { binary: "pdftotext".to_string(), args: strs(&["-", "-"]), disabled_by_default: None, + match_only_by_mime: None // postprocessors: [{name: "add_page_numbers_by_pagebreaks"}] } ]; @@ -199,6 +203,7 @@ impl CustomAdapterConfig { .map(|s| FileMatcher::MimeType(s.to_string())) .collect() }), + keep_fast_matchers_if_accurate: !self.match_only_by_mime.unwrap_or(false), disabled_by_default: self.disabled_by_default.unwrap_or(false), }, }; diff --git a/src/adapters/decompress.rs b/src/adapters/decompress.rs index 85c2bdd..e9dcc1e 100644 --- a/src/adapters/decompress.rs +++ b/src/adapters/decompress.rs @@ -30,7 +30,8 @@ lazy_static! { .map(|s| FileMatcher::MimeType(s.to_string())) .collect() ), - disabled_by_default: false + disabled_by_default: false, + keep_fast_matchers_if_accurate: true }; } #[derive(Default)] diff --git a/src/adapters/ffmpeg.rs b/src/adapters/ffmpeg.rs index f9097c0..a45c7ff 100644 --- a/src/adapters/ffmpeg.rs +++ b/src/adapters/ffmpeg.rs @@ -24,7 +24,8 @@ lazy_static! { .map(|s| FastFileMatcher::FileExtension(s.to_string())) .collect(), slow_matchers: None, - disabled_by_default: false + disabled_by_default: false, + keep_fast_matchers_if_accurate: true }; } diff --git a/src/adapters/fns.rs b/src/adapters/fns.rs index 47cf93c..29f74a1 100644 --- a/src/adapters/fns.rs +++ b/src/adapters/fns.rs @@ -9,24 +9,6 @@ use std::{ io::{Read, Write}, }; -fn postproc(line_prefix: &str, inp: &mut dyn Read, oup: &mut dyn Write) -> Result<()> { - // prepend Page X to each line - let mut page = 1; - for line in BufReader::new(inp).lines() { - let mut line = line?; - if line.contains('\x0c') { - // page break - line = line.replace('\x0c', ""); - page += 1; - if line.is_empty() { - continue; - } - } - oup.write_all(format!("{}Page {}: {}\n", line_prefix, page, line).as_bytes())?; - } - Ok(()) -} - struct ByteReplacer where R: Read, diff --git a/src/adapters/pdfpages.rs b/src/adapters/pdfpages.rs index e4bafa4..2fa572b 100644 --- a/src/adapters/pdfpages.rs +++ b/src/adapters/pdfpages.rs @@ -18,11 +18,12 @@ lazy_static! { recurses: true, fast_matchers: EXTENSIONS .iter() - .map(|s| FastMatcher::FileExtension(s.to_string())) + .map(|s| FastFileMatcher::FileExtension(s.to_string())) .collect(), - slow_matchers: Some(vec![SlowMatcher::MimeType( + slow_matchers: Some(vec![FileMatcher::MimeType( "application/pdf".to_owned() )]), + keep_fast_matchers_if_accurate: true, disabled_by_default: true }; } @@ -44,7 +45,7 @@ impl GetMetadata for PdfPagesAdapter { /// A pdf is basically converted to a zip that has Page X.png files. /// This way, something like tesseract can process the pages individually impl FileAdapter for PdfPagesAdapter { - fn adapt(&self, ai: AdaptInfo, _detection_reason: &SlowMatcher) -> Result<()> { + fn adapt(&self, ai: AdaptInfo, _detection_reason: &FileMatcher) -> Result<()> { let AdaptInfo { filepath_hint, is_real_file, diff --git a/src/adapters/poppler.rs b/src/adapters/poppler.rs deleted file mode 100644 index 1439eb8..0000000 --- a/src/adapters/poppler.rs +++ /dev/null @@ -1,29 +0,0 @@ - - - - - -/* -static EXTENSIONS: &[&str] = &["pdf"]; - - - postproc: "add_lines" - fn postproc(line_prefix: &str, inp: &mut dyn Read, oup: &mut dyn Write) -> Result<()> { - // prepend Page X to each line - let mut page = 1; - for line in BufReader::new(inp).lines() { - let mut line = line?; - if line.contains('\x0c') { - // page break - line = line.replace('\x0c', ""); - page += 1; - if line.is_empty() { - continue; - } - } - oup.write_all(format!("{}Page {}: {}\n", line_prefix, page, line).as_bytes())?; - } - Ok(()) - } -} -*/ diff --git a/src/adapters/spawning.rs b/src/adapters/spawning.rs index 18fd1d1..7327c65 100644 --- a/src/adapters/spawning.rs +++ b/src/adapters/spawning.rs @@ -2,7 +2,7 @@ use super::*; use anyhow::*; use encoding_rs_io::DecodeReaderBytesBuilder; use log::*; -use regex::Regex; + use std::io::prelude::*; use std::io::BufReader; use std::process::Command; @@ -111,13 +111,13 @@ impl Read for ProcWaitReader { } } } -pub fn pipe_output( +pub fn pipe_output<'a>( _line_prefix: &str, mut cmd: Command, - inp: &mut (dyn Read), + inp: &mut (dyn Read + 'a), exe_name: &str, help: &str, -) -> Result { +) -> Result> { let mut cmd = cmd .stdin(Stdio::piped()) .stdout(Stdio::piped()) @@ -138,7 +138,7 @@ pub fn pipe_output( } impl FileAdapter for SpawningFileAdapter { - fn adapt(&self, ai: AdaptInfo, _detection_reason: &FileMatcher) -> Result { + fn adapt<'a>(&self, ai: AdaptInfo<'a>, _detection_reason: &FileMatcher) -> Result> { let AdaptInfo { filepath_hint, mut inp, diff --git a/src/adapters/sqlite.rs b/src/adapters/sqlite.rs index a302bc6..2e42441 100644 --- a/src/adapters/sqlite.rs +++ b/src/adapters/sqlite.rs @@ -24,6 +24,7 @@ lazy_static! { slow_matchers: Some(vec![FileMatcher::MimeType( "application/x-sqlite3".to_owned() )]), + keep_fast_matchers_if_accurate: false, disabled_by_default: false }; } diff --git a/src/adapters/tar.rs b/src/adapters/tar.rs index 9ec6efc..0f08cfd 100644 --- a/src/adapters/tar.rs +++ b/src/adapters/tar.rs @@ -5,6 +5,7 @@ use anyhow::*; use lazy_static::lazy_static; use log::*; use std::path::PathBuf; +use writing::{WritingFileAdapter, WritingFileAdapterTrait}; static EXTENSIONS: &[&str] = &["tar"]; @@ -16,18 +17,19 @@ lazy_static! { recurses: true, fast_matchers: EXTENSIONS .iter() - .map(|s| FastMatcher::FileExtension(s.to_string())) + .map(|s| FastFileMatcher::FileExtension(s.to_string())) .collect(), slow_matchers: None, + keep_fast_matchers_if_accurate: true, disabled_by_default: false }; } -#[derive(Default)] +#[derive(Default, Clone)] pub struct TarAdapter; impl TarAdapter { - pub fn new() -> TarAdapter { - TarAdapter + pub fn new() -> WritingFileAdapter { + WritingFileAdapter::new(Box::new(TarAdapter)) } } impl GetMetadata for TarAdapter { @@ -36,12 +38,16 @@ impl GetMetadata for TarAdapter { } } -impl FileAdapter for TarAdapter { - fn adapt(&self, ai: AdaptInfo, _detection_reason: &SlowMatcher) -> Result<()> { +impl WritingFileAdapterTrait for TarAdapter { + fn adapt_write( + &self, + ai: AdaptInfo, + _detection_reason: &FileMatcher, + oup: &mut dyn Write, + ) -> Result<()> { let AdaptInfo { filepath_hint, mut inp, - oup, line_prefix, archive_recursion_depth, config, @@ -60,10 +66,10 @@ impl FileAdapter for TarAdapter { ); let line_prefix = &format!("{}{}: ", line_prefix, path.display()); let ai2: AdaptInfo = AdaptInfo { - filepath_hint: &path, + filepath_hint: path, is_real_file: false, archive_recursion_depth: archive_recursion_depth + 1, - inp: &mut file, + inp: Box::new(file), oup, line_prefix, config: config.clone(), diff --git a/src/adapters/tesseract.rs b/src/adapters/tesseract.rs index 88ecd4c..6dc8cfe 100644 --- a/src/adapters/tesseract.rs +++ b/src/adapters/tesseract.rs @@ -13,9 +13,10 @@ lazy_static! { recurses: false, fast_matchers: EXTENSIONS .iter() - .map(|s| FastMatcher::FileExtension(s.to_string())) + .map(|s| FastFileMatcher::FileExtension(s.to_string())) .collect(), slow_matchers: None, + keep_fast_matchers_if_accurate: true, disabled_by_default: true }; } @@ -40,6 +41,6 @@ impl SpawningFileAdapterTrait for TesseractAdapter { fn command(&self, _filepath_hint: &Path, mut cmd: Command) -> Command { // rg already does threading cmd.env("OMP_THREAD_LIMIT", "1").arg("-").arg("-"); - cmd + Some(cmd) } } diff --git a/src/adapters/writing.rs b/src/adapters/writing.rs index afeea9b..6824e02 100644 --- a/src/adapters/writing.rs +++ b/src/adapters/writing.rs @@ -2,14 +2,14 @@ use super::{FileAdapter, GetMetadata, ReadBox}; use anyhow::Result; use std::io::Write; -// this trait / struct split is necessary because of "conflicting trait implementation" otherwise with SpawningFileAdapter +// this trait / struct split is ugly but necessary because of "conflicting trait implementation" otherwise with SpawningFileAdapter #[dyn_clonable::clonable] pub trait WritingFileAdapterTrait: GetMetadata + Send + Clone { - fn adapt_write( + fn adapt_write<'a>( &self, - a: super::AdaptInfo, + a: super::AdaptInfo<'a>, detection_reason: &crate::matching::FileMatcher, - oup: &mut dyn Write, + oup: &mut (dyn Write + 'a), ) -> Result<()>; } @@ -29,17 +29,17 @@ impl GetMetadata for WritingFileAdapter { } impl FileAdapter for WritingFileAdapter { - fn adapt( + fn adapt<'a>( &self, - a: super::AdaptInfo, + ai_outer: super::AdaptInfo<'a>, detection_reason: &crate::matching::FileMatcher, - ) -> anyhow::Result { + ) -> anyhow::Result> { let (r, w) = crate::pipe::pipe(); let cc = self.inner.clone(); let detc = detection_reason.clone(); std::thread::spawn(move || { let mut oup = w; - let ai = a; + let ai = ai_outer; let res = cc.adapt_write(ai, &detc, &mut oup); if let Err(e) = res { oup.write_err(std::io::Error::new(std::io::ErrorKind::Other, e)) diff --git a/src/adapters/zip.rs b/src/adapters/zip.rs index f43b4c2..1d41179 100644 --- a/src/adapters/zip.rs +++ b/src/adapters/zip.rs @@ -4,6 +4,7 @@ use ::zip::read::ZipFile; use anyhow::*; use lazy_static::lazy_static; use log::*; +use writing::{WritingFileAdapter, WritingFileAdapterTrait}; // todo: // maybe todo: read list of extensions from @@ -18,18 +19,19 @@ lazy_static! { recurses: true, fast_matchers: EXTENSIONS .iter() - .map(|s| FastMatcher::FileExtension(s.to_string())) + .map(|s| FastFileMatcher::FileExtension(s.to_string())) .collect(), - slow_matchers: Some(vec![SlowMatcher::MimeType("application/zip".to_owned())]), + slow_matchers: Some(vec![FileMatcher::MimeType("application/zip".to_owned())]), + keep_fast_matchers_if_accurate: false, disabled_by_default: false }; } -#[derive(Default)] +#[derive(Default, Clone)] pub struct ZipAdapter; impl ZipAdapter { - pub fn new() -> ZipAdapter { - ZipAdapter + pub fn new() -> WritingFileAdapter { + WritingFileAdapter::new(Box::new(ZipAdapter)) } } impl GetMetadata for ZipAdapter { @@ -47,12 +49,16 @@ fn is_dir(f: &ZipFile) -> bool { .map_or(false, |c| c == '/' || c == '\\') } -impl FileAdapter for ZipAdapter { - fn adapt(&self, ai: AdaptInfo, _detection_reason: &SlowMatcher) -> Result<()> { +impl WritingFileAdapterTrait for ZipAdapter { + fn adapt_write<'a>( + &self, + ai: AdaptInfo<'a>, + _detection_reason: &FileMatcher, + oup: &mut (dyn Write + 'a), + ) -> Result<()> { let AdaptInfo { filepath_hint, mut inp, - oup, line_prefix, archive_recursion_depth, config, @@ -73,16 +79,18 @@ impl FileAdapter for ZipAdapter { print_bytes(file.size() as f64), print_bytes(file.compressed_size() as f64) ); - let line_prefix = &format!("{}{}: ", line_prefix, file.name()); - rga_preproc(AdaptInfo { - filepath_hint: &file.sanitized_name(), + let line_prefix = format!("{}{}: ", line_prefix, file.name()); + let mut rd = rga_preproc(AdaptInfo { + filepath_hint: file.sanitized_name().clone(), is_real_file: false, inp: &mut file, - oup, line_prefix, archive_recursion_depth: archive_recursion_depth + 1, config: config.clone(), })?; + // copy read stream from inner file to output + std::io::copy(&mut rd, oup); + drop(rd); } Err(e) => return Err(e.into()), } diff --git a/src/caching_writer.rs b/src/caching_writer.rs index e71e591..6e34c27 100644 --- a/src/caching_writer.rs +++ b/src/caching_writer.rs @@ -1,6 +1,6 @@ use anyhow::Result; use log::*; -use std::io::{BufReader, Read, Write}; +use std::io::{Read, Write}; /** * wrap a writer so that it is passthrough, diff --git a/src/preproc.rs b/src/preproc.rs index 4dce0b2..231b3d4 100644 --- a/src/preproc.rs +++ b/src/preproc.rs @@ -1,7 +1,6 @@ use crate::adapters::*; use crate::matching::*; use crate::{ - config::RgaConfig, preproc_cache::{LmdbCache, PreprocCache}, print_bytes, print_dur, CachingReader, }; @@ -12,7 +11,7 @@ use std::convert::TryInto; use std::io::{BufRead, BufReader}; -use std::{path::PathBuf, rc::Rc, time::Instant}; +use std::{rc::Rc, time::Instant}; /** * preprocess a file as defined in `ai`. * @@ -88,12 +87,12 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result { } } -fn run_adapter( - ai: AdaptInfo, +fn run_adapter<'a>( + ai: AdaptInfo<'a>, adapter: Rc, detection_reason: FileMatcher, filtered_adapters: &Vec>, -) -> Result { +) -> Result> { let AdaptInfo { filepath_hint, is_real_file, diff --git a/src/preproc_cache.rs b/src/preproc_cache.rs index 4f6e37c..5ee1daa 100644 --- a/src/preproc_cache.rs +++ b/src/preproc_cache.rs @@ -1,12 +1,7 @@ -use crate::{config::CacheConfig, print_bytes, print_dur, project_dirs}; +use crate::{config::CacheConfig, print_bytes, print_dur}; use anyhow::{format_err, Context, Result}; use log::*; -use std::{ - fmt::Display, - path::Path, - sync::{Arc, RwLock}, - time::Instant, -}; +use std::{fmt::Display, path::Path, time::Instant}; pub trait PreprocCache: Send + Sync { /*/// gets cache at specified key.