mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-08 22:10:37 +00:00
partial move to async
This commit is contained in:
parent
94f06fba37
commit
cde0e209d2
272
Cargo.lock
generated
272
Cargo.lock
generated
@ -70,6 +70,41 @@ version = "0.3.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
|
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-compression"
|
||||||
|
version = "0.3.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"memchr",
|
||||||
|
"pin-project-lite",
|
||||||
|
"tokio",
|
||||||
|
"zstd",
|
||||||
|
"zstd-safe",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-stream"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e"
|
||||||
|
dependencies = [
|
||||||
|
"async-stream-impl",
|
||||||
|
"futures-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-stream-impl"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "atty"
|
name = "atty"
|
||||||
version = "0.2.14"
|
version = "0.2.14"
|
||||||
@ -135,6 +170,12 @@ version = "1.4.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bytes"
|
||||||
|
version = "1.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bzip2"
|
name = "bzip2"
|
||||||
version = "0.4.3"
|
version = "0.4.3"
|
||||||
@ -598,6 +639,55 @@ dependencies = [
|
|||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-core"
|
||||||
|
version = "0.3.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-io"
|
||||||
|
version = "0.3.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "00f5fb52a06bdcadeb54e8d3671f8888a39697dcb0b81b23b55174030427f4eb"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-macro"
|
||||||
|
version = "0.3.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bdfb8ce053d86b91919aad980c220b1fb8401a9394410e1c289ed7e66b61835d"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-sink"
|
||||||
|
version = "0.3.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-task"
|
||||||
|
version = "0.3.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-util"
|
||||||
|
version = "0.3.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"futures-macro",
|
||||||
|
"futures-task",
|
||||||
|
"pin-project-lite",
|
||||||
|
"pin-utils",
|
||||||
|
"slab",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "generic-array"
|
name = "generic-array"
|
||||||
version = "0.12.4"
|
version = "0.12.4"
|
||||||
@ -845,6 +935,16 @@ dependencies = [
|
|||||||
"pkg-config",
|
"pkg-config",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lock_api"
|
||||||
|
version = "0.4.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"scopeguard",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "log"
|
name = "log"
|
||||||
version = "0.4.17"
|
version = "0.4.17"
|
||||||
@ -895,6 +995,18 @@ dependencies = [
|
|||||||
"adler",
|
"adler",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mio"
|
||||||
|
version = "0.8.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"log",
|
||||||
|
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||||
|
"windows-sys 0.42.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nom"
|
name = "nom"
|
||||||
version = "7.1.1"
|
version = "7.1.1"
|
||||||
@ -969,6 +1081,16 @@ dependencies = [
|
|||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_cpus"
|
||||||
|
version = "1.13.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num_threads"
|
name = "num_threads"
|
||||||
version = "0.1.6"
|
version = "0.1.6"
|
||||||
@ -1014,6 +1136,29 @@ dependencies = [
|
|||||||
"stable_deref_trait",
|
"stable_deref_trait",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
||||||
|
dependencies = [
|
||||||
|
"lock_api",
|
||||||
|
"parking_lot_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot_core"
|
||||||
|
version = "0.9.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
"redox_syscall",
|
||||||
|
"smallvec",
|
||||||
|
"windows-sys 0.42.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "password-hash"
|
name = "password-hash"
|
||||||
version = "0.4.2"
|
version = "0.4.2"
|
||||||
@ -1065,6 +1210,18 @@ dependencies = [
|
|||||||
"indexmap",
|
"indexmap",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pin-project-lite"
|
||||||
|
version = "0.2.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pin-utils"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pkg-config"
|
name = "pkg-config"
|
||||||
version = "0.3.26"
|
version = "0.3.26"
|
||||||
@ -1180,7 +1337,10 @@ name = "ripgrep_all"
|
|||||||
version = "0.9.7-alpha.0"
|
version = "0.9.7-alpha.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"async-compression",
|
||||||
|
"async-stream",
|
||||||
"bincode",
|
"bincode",
|
||||||
|
"bytes",
|
||||||
"bzip2",
|
"bzip2",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap 4.0.18",
|
"clap 4.0.18",
|
||||||
@ -1213,6 +1373,9 @@ dependencies = [
|
|||||||
"structopt",
|
"structopt",
|
||||||
"tar",
|
"tar",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
|
"tokio",
|
||||||
|
"tokio-stream",
|
||||||
|
"tokio-util",
|
||||||
"tree_magic_mini",
|
"tree_magic_mini",
|
||||||
"xz2",
|
"xz2",
|
||||||
"zip",
|
"zip",
|
||||||
@ -1392,6 +1555,15 @@ dependencies = [
|
|||||||
"digest",
|
"digest",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "signal-hook-registry"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "size_format"
|
name = "size_format"
|
||||||
version = "1.0.2"
|
version = "1.0.2"
|
||||||
@ -1402,12 +1574,31 @@ dependencies = [
|
|||||||
"num",
|
"num",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "slab"
|
||||||
|
version = "0.4.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "smallvec"
|
name = "smallvec"
|
||||||
version = "1.10.0"
|
version = "1.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
|
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "socket2"
|
||||||
|
version = "0.4.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "stable_deref_trait"
|
name = "stable_deref_trait"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
@ -1595,6 +1786,87 @@ version = "0.1.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokio"
|
||||||
|
version = "1.21.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a9e03c497dc955702ba729190dc4aac6f2a0ce97f913e5b1b5912fc5039d9099"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"bytes",
|
||||||
|
"libc",
|
||||||
|
"memchr",
|
||||||
|
"mio",
|
||||||
|
"num_cpus",
|
||||||
|
"parking_lot",
|
||||||
|
"pin-project-lite",
|
||||||
|
"signal-hook-registry",
|
||||||
|
"socket2",
|
||||||
|
"tokio-macros",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokio-macros"
|
||||||
|
version = "1.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokio-stream"
|
||||||
|
version = "0.1.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"pin-project-lite",
|
||||||
|
"tokio",
|
||||||
|
"tokio-util",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokio-util"
|
||||||
|
version = "0.7.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
"futures-core",
|
||||||
|
"futures-io",
|
||||||
|
"futures-sink",
|
||||||
|
"futures-util",
|
||||||
|
"hashbrown",
|
||||||
|
"pin-project-lite",
|
||||||
|
"slab",
|
||||||
|
"tokio",
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tracing"
|
||||||
|
version = "0.1.37"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"pin-project-lite",
|
||||||
|
"tracing-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tracing-core"
|
||||||
|
version = "0.1.30"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a"
|
||||||
|
dependencies = [
|
||||||
|
"once_cell",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tree_magic_mini"
|
name = "tree_magic_mini"
|
||||||
version = "3.0.3"
|
version = "3.0.3"
|
||||||
|
@ -53,6 +53,12 @@ dyn-clone = "1.0.2"
|
|||||||
dyn-clonable = "0.9.0"
|
dyn-clonable = "0.9.0"
|
||||||
zip = "0.6.3"
|
zip = "0.6.3"
|
||||||
owning_ref = "0.4.1"
|
owning_ref = "0.4.1"
|
||||||
|
tokio = { version = "1.21.2", features = ["full"] }
|
||||||
|
async-compression = { version = "0.3.15", features = ["tokio", "zstd"] }
|
||||||
|
tokio-stream = { version = "0.1.11", features = ["io-util", "tokio-util"] }
|
||||||
|
async-stream = "0.3.3"
|
||||||
|
bytes = "1.2.1"
|
||||||
|
tokio-util = { version = "0.7.4", features = ["io", "full"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
ctor = "0.1.20"
|
ctor = "0.1.20"
|
||||||
|
@ -1,28 +1,29 @@
|
|||||||
pub mod custom;
|
pub mod custom;
|
||||||
// pub mod decompress;
|
// pub mod decompress;
|
||||||
// pub mod ffmpeg;
|
// pub mod ffmpeg;
|
||||||
pub mod postproc;
|
// pub mod postproc;
|
||||||
// pub mod pdfpages;
|
// pub mod pdfpages;
|
||||||
pub mod spawning;
|
pub mod spawning;
|
||||||
// pub mod sqlite;
|
// pub mod sqlite;
|
||||||
// pub mod tar;
|
// pub mod tar;
|
||||||
// pub mod tesseract;
|
// pub mod tesseract;
|
||||||
// pub mod writing;
|
// pub mod writing;
|
||||||
pub mod zip;
|
// pub mod zip;
|
||||||
use crate::{adapted_iter::AdaptedFilesIterBox, config::RgaConfig, matching::*};
|
use crate::{adapted_iter::AdaptedFilesIterBox, config::RgaConfig, matching::*};
|
||||||
use anyhow::*;
|
use anyhow::*;
|
||||||
use custom::builtin_spawning_adapters;
|
use custom::builtin_spawning_adapters;
|
||||||
use custom::CustomAdapterConfig;
|
use custom::CustomAdapterConfig;
|
||||||
use log::*;
|
use log::*;
|
||||||
|
use tokio::io::AsyncRead;
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::prelude::*;
|
|
||||||
use std::iter::Iterator;
|
use std::iter::Iterator;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::pin::Pin;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
pub type ReadBox<'a> = Box<dyn Read + 'a>;
|
pub type ReadBox<'a> = Pin<Box<dyn AsyncRead + 'a>>;
|
||||||
pub struct AdapterMeta {
|
pub struct AdapterMeta {
|
||||||
/// unique short name of this adapter (a-z0-9 only)
|
/// unique short name of this adapter (a-z0-9 only)
|
||||||
pub name: String,
|
pub name: String,
|
||||||
@ -115,7 +116,7 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
|
|||||||
|
|
||||||
let internal_adapters: Vec<Rc<dyn FileAdapter>> = vec![
|
let internal_adapters: Vec<Rc<dyn FileAdapter>> = vec![
|
||||||
//Rc::new(ffmpeg::FFmpegAdapter::new()),
|
//Rc::new(ffmpeg::FFmpegAdapter::new()),
|
||||||
Rc::new(zip::ZipAdapter::new()),
|
// Rc::new(zip::ZipAdapter::new()),
|
||||||
//Rc::new(decompress::DecompressAdapter::new()),
|
//Rc::new(decompress::DecompressAdapter::new()),
|
||||||
// Rc::new(tar::TarAdapter::new()),
|
// Rc::new(tar::TarAdapter::new()),
|
||||||
//Rc::new(sqlite::SqliteAdapter::new()),
|
//Rc::new(sqlite::SqliteAdapter::new()),
|
||||||
|
@ -166,8 +166,8 @@ impl SpawningFileAdapterTrait for CustomSpawningFileAdapter {
|
|||||||
fn command(
|
fn command(
|
||||||
&self,
|
&self,
|
||||||
filepath_hint: &std::path::Path,
|
filepath_hint: &std::path::Path,
|
||||||
mut command: std::process::Command,
|
mut command: tokio::process::Command,
|
||||||
) -> Result<std::process::Command> {
|
) -> Result<tokio::process::Command> {
|
||||||
command.args(
|
command.args(
|
||||||
self.args
|
self.args
|
||||||
.iter()
|
.iter()
|
||||||
@ -218,10 +218,10 @@ mod test {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::test_utils::*;
|
use crate::test_utils::*;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use std::fs::File;
|
use tokio::fs::File;
|
||||||
|
|
||||||
#[test]
|
#[tokio::test]
|
||||||
fn poppler() -> Result<()> {
|
async fn poppler() -> Result<()> {
|
||||||
let adapter = builtin_spawning_adapters
|
let adapter = builtin_spawning_adapters
|
||||||
.iter()
|
.iter()
|
||||||
.find(|e| e.name == "poppler")
|
.find(|e| e.name == "poppler")
|
||||||
@ -231,7 +231,7 @@ mod test {
|
|||||||
|
|
||||||
let filepath = test_data_dir().join("short.pdf");
|
let filepath = test_data_dir().join("short.pdf");
|
||||||
|
|
||||||
let (a, d) = simple_adapt_info(&filepath, Box::new(File::open(&filepath)?));
|
let (a, d) = simple_adapt_info(&filepath, Box::pin(File::open(&filepath).await?));
|
||||||
let r = adapter.adapt(a, &d)?;
|
let r = adapter.adapt(a, &d)?;
|
||||||
let o = adapted_to_vec(r)?;
|
let o = adapted_to_vec(r)?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -5,10 +5,10 @@
|
|||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use encoding_rs_io::DecodeReaderBytesBuilder;
|
use encoding_rs_io::DecodeReaderBytesBuilder;
|
||||||
|
use tokio::io::AsyncRead;
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
cmp::min,
|
cmp::min,
|
||||||
io::{BufRead, BufReader, Read},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::adapted_iter::{AdaptedFilesIterBox, SingleAdaptedFileAsIter};
|
use crate::adapted_iter::{AdaptedFilesIterBox, SingleAdaptedFileAsIter};
|
||||||
@ -16,11 +16,11 @@ use crate::adapted_iter::{AdaptedFilesIterBox, SingleAdaptedFileAsIter};
|
|||||||
use super::{AdaptInfo, AdapterMeta, FileAdapter, GetMetadata};
|
use super::{AdaptInfo, AdapterMeta, FileAdapter, GetMetadata};
|
||||||
|
|
||||||
/** pass through, except adding \n at the end */
|
/** pass through, except adding \n at the end */
|
||||||
pub struct EnsureEndsWithNewline<R: Read> {
|
pub struct EnsureEndsWithNewline<R: AsyncRead> {
|
||||||
inner: R,
|
inner: R,
|
||||||
added_newline: bool,
|
added_newline: bool,
|
||||||
}
|
}
|
||||||
impl<R: Read> EnsureEndsWithNewline<R> {
|
impl<R: AsyncRead> EnsureEndsWithNewline<R> {
|
||||||
pub fn new(r: R) -> EnsureEndsWithNewline<R> {
|
pub fn new(r: R) -> EnsureEndsWithNewline<R> {
|
||||||
EnsureEndsWithNewline {
|
EnsureEndsWithNewline {
|
||||||
inner: r,
|
inner: r,
|
||||||
@ -28,8 +28,8 @@ impl<R: Read> EnsureEndsWithNewline<R> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<R: Read> Read for EnsureEndsWithNewline<R> {
|
impl<R: AsyncRead> AsyncRead for EnsureEndsWithNewline<R> {
|
||||||
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
fn poll_read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
match self.inner.read(buf) {
|
match self.inner.read(buf) {
|
||||||
Ok(0) => {
|
Ok(0) => {
|
||||||
if self.added_newline {
|
if self.added_newline {
|
||||||
@ -47,7 +47,7 @@ impl<R: Read> Read for EnsureEndsWithNewline<R> {
|
|||||||
}
|
}
|
||||||
struct ByteReplacer<R>
|
struct ByteReplacer<R>
|
||||||
where
|
where
|
||||||
R: Read,
|
R:AsyncRead,
|
||||||
{
|
{
|
||||||
inner: R,
|
inner: R,
|
||||||
next_read: Vec<u8>,
|
next_read: Vec<u8>,
|
||||||
@ -57,7 +57,7 @@ where
|
|||||||
|
|
||||||
impl<R> ByteReplacer<R>
|
impl<R> ByteReplacer<R>
|
||||||
where
|
where
|
||||||
R: Read,
|
R: AsyncRead,
|
||||||
{
|
{
|
||||||
fn output_next(&mut self, buf: &mut [u8], buf_valid_until: usize, replacement: &[u8]) -> usize {
|
fn output_next(&mut self, buf: &mut [u8], buf_valid_until: usize, replacement: &[u8]) -> usize {
|
||||||
let after_part1 = Vec::from(&buf[1..buf_valid_until]);
|
let after_part1 = Vec::from(&buf[1..buf_valid_until]);
|
||||||
@ -80,11 +80,11 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R> Read for ByteReplacer<R>
|
impl<R> AsyncRead for ByteReplacer<R>
|
||||||
where
|
where
|
||||||
R: Read,
|
R: AsyncRead,
|
||||||
{
|
{
|
||||||
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
fn poll_read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
let read = if self.next_read.len() > 0 {
|
let read = if self.next_read.len() > 0 {
|
||||||
let count = std::cmp::min(self.next_read.len(), buf.len());
|
let count = std::cmp::min(self.next_read.len(), buf.len());
|
||||||
buf[0..count].copy_from_slice(&self.next_read[0..count]);
|
buf[0..count].copy_from_slice(&self.next_read[0..count]);
|
||||||
@ -158,10 +158,10 @@ impl Read for ReadErr {
|
|||||||
}
|
}
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
pub fn postproc_encoding<'a, R: Read + 'a>(
|
pub fn postproc_encoding<'a, R: AsyncRead + 'a>(
|
||||||
line_prefix: &str,
|
line_prefix: &str,
|
||||||
inp: R,
|
inp: R,
|
||||||
) -> Result<Box<dyn Read + 'a>> {
|
) -> Result<Box<dyn AsyncRead + 'a>> {
|
||||||
// TODO: parse these options from ripgrep's configuration
|
// TODO: parse these options from ripgrep's configuration
|
||||||
let encoding = None; // detect bom but usually assume utf8
|
let encoding = None; // detect bom but usually assume utf8
|
||||||
let bom_sniffing = true;
|
let bom_sniffing = true;
|
||||||
@ -202,7 +202,7 @@ pub fn postproc_encoding<'a, R: Read + 'a>(
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn postproc_prefix(line_prefix: &str, inp: impl Read) -> impl Read {
|
pub fn postproc_prefix(line_prefix: &str, inp: impl AsyncRead) -> impl AsyncRead {
|
||||||
let line_prefix = line_prefix.to_string(); // clone since we need it later
|
let line_prefix = line_prefix.to_string(); // clone since we need it later
|
||||||
ByteReplacer {
|
ByteReplacer {
|
||||||
inner: inp,
|
inner: inp,
|
||||||
@ -212,7 +212,7 @@ pub fn postproc_prefix(line_prefix: &str, inp: impl Read) -> impl Read {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn postproc_pagebreaks(line_prefix: &str, inp: impl Read) -> impl Read {
|
pub fn postproc_pagebreaks(line_prefix: &str, inp: impl AsyncRead) -> impl AsyncRead {
|
||||||
let line_prefix = line_prefix.to_string(); // clone since
|
let line_prefix = line_prefix.to_string(); // clone since
|
||||||
let mut page_count = 1;
|
let mut page_count = 1;
|
||||||
|
|
||||||
|
@ -1,14 +1,17 @@
|
|||||||
|
|
||||||
use crate::adapted_iter::SingleAdaptedFileAsIter;
|
use crate::adapted_iter::SingleAdaptedFileAsIter;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use log::*;
|
use log::*;
|
||||||
|
|
||||||
use std::process::Command;
|
|
||||||
use std::process::{Child, Stdio};
|
|
||||||
use std::{io::prelude::*, path::Path};
|
|
||||||
use crate::adapters::FileAdapter;
|
use crate::adapters::FileAdapter;
|
||||||
|
use std::future::Future;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::process::{ExitStatus, Stdio};
|
||||||
|
use std::task::Poll;
|
||||||
|
use tokio::io::AsyncReadExt;
|
||||||
|
use tokio::process::Child;
|
||||||
|
use tokio::process::Command;
|
||||||
|
|
||||||
// TODO: don't separate the trait and the struct
|
// TODO: don't separate the trait and the struct
|
||||||
pub trait SpawningFileAdapterTrait: GetMetadata {
|
pub trait SpawningFileAdapterTrait: GetMetadata {
|
||||||
@ -50,11 +53,29 @@ pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
|
|||||||
|
|
||||||
/** waits for a process to finish, returns an io error if the process failed */
|
/** waits for a process to finish, returns an io error if the process failed */
|
||||||
struct ProcWaitReader {
|
struct ProcWaitReader {
|
||||||
proce: Child,
|
proce: Pin<Box<dyn Future<Output = std::io::Result<ExitStatus>>>>,
|
||||||
}
|
}
|
||||||
impl Read for ProcWaitReader {
|
impl AsyncRead for ProcWaitReader {
|
||||||
fn read(&mut self, _buf: &mut [u8]) -> std::io::Result<usize> {
|
fn poll_read(
|
||||||
let status = self.proce.wait()?;
|
self: Pin<&mut Self>,
|
||||||
|
cx: &mut std::task::Context<'_>,
|
||||||
|
buf: &mut tokio::io::ReadBuf<'_>,
|
||||||
|
) -> std::task::Poll<std::io::Result<()>> {
|
||||||
|
match self.proce.as_mut().poll(cx) {
|
||||||
|
std::task::Poll::Ready(x) => {
|
||||||
|
let x = x?;
|
||||||
|
if x.success() {
|
||||||
|
Poll::Ready(std::io::Result::Ok(()))
|
||||||
|
} else {
|
||||||
|
Poll::Ready(Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::Other,
|
||||||
|
format_err!("subprocess failed: {:?}", x),
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Poll::Pending => std::task::Poll::Pending,
|
||||||
|
}
|
||||||
|
/*let status = self.proce.wait();
|
||||||
if status.success() {
|
if status.success() {
|
||||||
std::io::Result::Ok(0)
|
std::io::Result::Ok(0)
|
||||||
} else {
|
} else {
|
||||||
@ -62,13 +83,13 @@ impl Read for ProcWaitReader {
|
|||||||
std::io::ErrorKind::Other,
|
std::io::ErrorKind::Other,
|
||||||
format_err!("subprocess failed: {:?}", status),
|
format_err!("subprocess failed: {:?}", status),
|
||||||
))
|
))
|
||||||
}
|
}*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn pipe_output<'a>(
|
pub fn pipe_output<'a>(
|
||||||
_line_prefix: &str,
|
_line_prefix: &str,
|
||||||
mut cmd: Command,
|
mut cmd: Command,
|
||||||
inp: &mut (dyn Read + 'a),
|
inp: ReadBox<'a>,
|
||||||
exe_name: &str,
|
exe_name: &str,
|
||||||
help: &str,
|
help: &str,
|
||||||
) -> Result<ReadBox<'a>> {
|
) -> Result<ReadBox<'a>> {
|
||||||
@ -81,10 +102,14 @@ pub fn pipe_output<'a>(
|
|||||||
let stdo = cmd.stdout.take().expect("is piped");
|
let stdo = cmd.stdout.take().expect("is piped");
|
||||||
|
|
||||||
// TODO: deadlocks since this is run in the same thread as the thing reading from stdout of the process
|
// TODO: deadlocks since this is run in the same thread as the thing reading from stdout of the process
|
||||||
std::io::copy(inp, &mut stdi)?;
|
tokio::task::spawn_local(async {
|
||||||
drop(stdi);
|
tokio::pin!(inp);
|
||||||
|
tokio::io::copy(&mut inp, &mut stdi).await;
|
||||||
|
});
|
||||||
|
|
||||||
Ok(Box::new(stdo.chain(ProcWaitReader { proce: cmd })))
|
Ok(Box::pin(stdo.chain(ProcWaitReader {
|
||||||
|
proce: Box::pin(cmd.wait()),
|
||||||
|
})))
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FileAdapter for SpawningFileAdapter {
|
impl FileAdapter for SpawningFileAdapter {
|
||||||
@ -109,7 +134,7 @@ impl FileAdapter for SpawningFileAdapter {
|
|||||||
.command(&filepath_hint, cmd)
|
.command(&filepath_hint, cmd)
|
||||||
.with_context(|| format!("Could not set cmd arguments for {}", self.inner.get_exe()))?;
|
.with_context(|| format!("Could not set cmd arguments for {}", self.inner.get_exe()))?;
|
||||||
debug!("executing {:?}", cmd);
|
debug!("executing {:?}", cmd);
|
||||||
let output = pipe_output(&line_prefix, cmd, &mut inp, self.inner.get_exe(), "")?;
|
let output = pipe_output(&line_prefix, cmd, inp, self.inner.get_exe(), "")?;
|
||||||
Ok(Box::new(SingleAdaptedFileAsIter::new(AdaptInfo {
|
Ok(Box::new(SingleAdaptedFileAsIter::new(AdaptInfo {
|
||||||
filepath_hint: PathBuf::from(format!("{}.txt", filepath_hint.to_string_lossy())), // TODO: customizable
|
filepath_hint: PathBuf::from(format!("{}.txt", filepath_hint.to_string_lossy())), // TODO: customizable
|
||||||
inp: output,
|
inp: output,
|
||||||
@ -122,14 +147,16 @@ impl FileAdapter for SpawningFileAdapter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use std::io::Cursor;
|
use std::io::Cursor;
|
||||||
|
|
||||||
use crate::{adapters::custom::CustomAdapterConfig, test_utils::{adapted_to_vec, simple_adapt_info}};
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::adapters::FileAdapter;
|
use crate::adapters::FileAdapter;
|
||||||
|
use crate::{
|
||||||
|
adapters::custom::CustomAdapterConfig,
|
||||||
|
test_utils::{adapted_to_vec, simple_adapt_info},
|
||||||
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn streaming() {
|
fn streaming() {
|
||||||
@ -143,7 +170,7 @@ mod test {
|
|||||||
mimetypes: None,
|
mimetypes: None,
|
||||||
match_only_by_mime: None,
|
match_only_by_mime: None,
|
||||||
binary: "sed".to_string(),
|
binary: "sed".to_string(),
|
||||||
args: vec!["s/e/u/g".to_string()]
|
args: vec!["s/e/u/g".to_string()],
|
||||||
};
|
};
|
||||||
|
|
||||||
let adapter = adapter.to_adapter();
|
let adapter = adapter.to_adapter();
|
||||||
@ -159,10 +186,13 @@ mod test {
|
|||||||
let input = format!("{0}{0}{0}{0}", input);
|
let input = format!("{0}{0}{0}{0}", input);
|
||||||
let input = format!("{0}{0}{0}{0}", input);
|
let input = format!("{0}{0}{0}{0}", input);
|
||||||
let input = format!("{0}{0}{0}{0}", input);
|
let input = format!("{0}{0}{0}{0}", input);
|
||||||
let (a, d) = simple_adapt_info(&Path::new("foo.txt"), Box::new(Cursor::new(input.as_bytes())));
|
let (a, d) = simple_adapt_info(
|
||||||
|
&Path::new("foo.txt"),
|
||||||
|
Box::new(Cursor::new(input.as_bytes())),
|
||||||
|
);
|
||||||
let output = adapter.adapt(a, &d).unwrap();
|
let output = adapter.adapt(a, &d).unwrap();
|
||||||
|
|
||||||
let oup = adapted_to_vec(output).unwrap();
|
let oup = adapted_to_vec(output).unwrap();
|
||||||
println!("output: {}", String::from_utf8_lossy(&oup));
|
println!("output: {}", String::from_utf8_lossy(&oup));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -85,7 +85,7 @@ impl<'a> AdaptedFilesIter for ZipAdaptIter<'a> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::{recurse::RecursingConcattyReader, test_utils::*};
|
use crate::test_utils::*;
|
||||||
|
|
||||||
fn create_zip(fname: &str, content: &str, add_inner: bool) -> Result<Vec<u8>> {
|
fn create_zip(fname: &str, content: &str, add_inner: bool) -> Result<Vec<u8>> {
|
||||||
use ::zip::write::FileOptions;
|
use ::zip::write::FileOptions;
|
||||||
|
@ -1,20 +1,24 @@
|
|||||||
|
use std::{pin::Pin, task::Poll};
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use log::*;
|
use log::*;
|
||||||
use std::io::{Read, Write};
|
use tokio::{io::{AsyncRead, AsyncWriteExt, AsyncWrite}, pin};
|
||||||
|
use async_compression::tokio::write::ZstdEncoder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* wrap a writer so that it is passthrough,
|
* wrap a writer so that it is passthrough,
|
||||||
* but also the written data is compressed and written into a buffer,
|
* but also the written data is compressed and written into a buffer,
|
||||||
* unless more than max_cache_size bytes is written, then the cache is dropped and it is pure passthrough.
|
* unless more than max_cache_size bytes is written, then the cache is dropped and it is pure passthrough.
|
||||||
*/
|
*/
|
||||||
pub struct CachingReader<R: Read> {
|
pub struct CachingReader<R: AsyncRead> {
|
||||||
max_cache_size: usize,
|
max_cache_size: usize,
|
||||||
zstd_writer: Option<zstd::stream::write::Encoder<'static, Vec<u8>>>,
|
// set to none if the size goes over the limit
|
||||||
inp: R,
|
zstd_writer: Option<ZstdEncoder<Vec<u8>>>,
|
||||||
|
inp: Pin<Box<R>>,
|
||||||
bytes_written: u64,
|
bytes_written: u64,
|
||||||
on_finish: Box<dyn FnOnce((u64, Option<Vec<u8>>)) -> Result<()> + Send>,
|
on_finish: Box<dyn FnOnce((u64, Option<Vec<u8>>)) -> Result<()> + Send>,
|
||||||
}
|
}
|
||||||
impl<R: Read> CachingReader<R> {
|
impl<R: AsyncRead> CachingReader<R> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
inp: R,
|
inp: R,
|
||||||
max_cache_size: usize,
|
max_cache_size: usize,
|
||||||
@ -22,56 +26,78 @@ impl<R: Read> CachingReader<R> {
|
|||||||
on_finish: Box<dyn FnOnce((u64, Option<Vec<u8>>)) -> Result<()> + Send>,
|
on_finish: Box<dyn FnOnce((u64, Option<Vec<u8>>)) -> Result<()> + Send>,
|
||||||
) -> Result<CachingReader<R>> {
|
) -> Result<CachingReader<R>> {
|
||||||
Ok(CachingReader {
|
Ok(CachingReader {
|
||||||
inp,
|
inp: Box::pin(inp),
|
||||||
max_cache_size,
|
max_cache_size,
|
||||||
zstd_writer: Some(zstd::stream::write::Encoder::new(
|
zstd_writer: Some(ZstdEncoder::with_quality(
|
||||||
Vec::new(),
|
Vec::new(),
|
||||||
compression_level,
|
async_compression::Level::Precise(compression_level as u32),
|
||||||
)?),
|
)),
|
||||||
bytes_written: 0,
|
bytes_written: 0,
|
||||||
on_finish,
|
on_finish,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
pub fn finish(&mut self) -> std::io::Result<(u64, Option<Vec<u8>>)> {
|
pub fn finish(&mut self, cx: &mut std::task::Context<'_>) -> std::io::Result<(u64, Option<Vec<u8>>)> {
|
||||||
if let Some(writer) = self.zstd_writer.take() {
|
if let Some(writer) = self.zstd_writer.take() {
|
||||||
let res = writer.finish()?;
|
pin!(writer);
|
||||||
|
writer.as_mut().poll_shutdown(cx)?;
|
||||||
|
let res = writer.into_inner();
|
||||||
if res.len() <= self.max_cache_size {
|
if res.len() <= self.max_cache_size {
|
||||||
return Ok((self.bytes_written, Some(res)));
|
return Ok((self.bytes_written, Some(res)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok((self.bytes_written, None))
|
Ok((self.bytes_written, None))
|
||||||
}
|
}
|
||||||
fn write_to_compressed(&mut self, buf: &[u8]) -> std::io::Result<()> {
|
async fn write_to_compressed(&mut self, buf: &[u8]) -> std::io::Result<()> {
|
||||||
if let Some(writer) = self.zstd_writer.as_mut() {
|
if let Some(writer) = self.zstd_writer.as_mut() {
|
||||||
let wrote = writer.write(buf)?;
|
let wrote = writer.write_all(buf).await?;
|
||||||
let compressed_len = writer.get_ref().len();
|
let compressed_len = writer.get_ref().len();
|
||||||
trace!("wrote {} to zstd, len now {}", wrote, compressed_len);
|
trace!("wrote {} to zstd, len now {}", buf.len(), compressed_len);
|
||||||
if compressed_len > self.max_cache_size {
|
if compressed_len > self.max_cache_size {
|
||||||
debug!("cache longer than max, dropping");
|
debug!("cache longer than max, dropping");
|
||||||
//writer.finish();
|
//writer.finish();
|
||||||
self.zstd_writer.take().unwrap().finish()?;
|
self.zstd_writer.take();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<R: Read> Read for CachingReader<R> {
|
impl<R> AsyncRead for CachingReader<R> where R: AsyncRead {
|
||||||
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
|
|
||||||
match self.inp.read(&mut buf) {
|
fn poll_read(
|
||||||
Ok(0) => {
|
self: std::pin::Pin<&mut Self>,
|
||||||
// move out of box, replace with noop lambda
|
cx: &mut std::task::Context<'_>,
|
||||||
let on_finish = std::mem::replace(&mut self.on_finish, Box::new(|_| Ok(())));
|
buf: &mut tokio::io::ReadBuf<'_>,
|
||||||
// EOF, finish!
|
) -> std::task::Poll<std::io::Result<()>> {
|
||||||
(on_finish)(self.finish()?)
|
let old_filled = buf.filled();
|
||||||
.map(|()| 0)
|
match self.inp.as_mut().poll_read(cx, &mut buf) {
|
||||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
|
/*Ok(0) => {
|
||||||
|
|
||||||
}
|
}
|
||||||
Ok(read_bytes) => {
|
Ok(read_bytes) => {
|
||||||
self.write_to_compressed(&buf[0..read_bytes])?;
|
self.write_to_compressed(&buf[0..read_bytes])?;
|
||||||
self.bytes_written += read_bytes as u64;
|
self.bytes_written += read_bytes as u64;
|
||||||
Ok(read_bytes)
|
Ok(read_bytes)
|
||||||
}
|
}*/
|
||||||
Err(e) => Err(e),
|
Poll::Ready(rdy) => {
|
||||||
|
if let Ok(()) = &rdy {
|
||||||
|
let slice = buf.filled();
|
||||||
|
let read_bytes = slice.len() - old_filled.len();
|
||||||
|
if read_bytes == 0 {
|
||||||
|
// EOF
|
||||||
|
// move out of box, replace with noop lambda
|
||||||
|
let on_finish = std::mem::replace(&mut self.on_finish, Box::new(|_| Ok(())));
|
||||||
|
// EOF, finish!
|
||||||
|
(on_finish)(self.finish(cx)?)
|
||||||
|
.map(|()| 0)
|
||||||
|
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
|
||||||
|
} else {
|
||||||
|
self.write_to_compressed(&slice[old_filled.len()..]);
|
||||||
|
self.bytes_written += read_bytes as u64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Poll::Ready(rdy)
|
||||||
|
},
|
||||||
|
Poll::Pending => Poll::Pending,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
304
src/preproc.rs
304
src/preproc.rs
@ -1,4 +1,6 @@
|
|||||||
use crate::adapters::*;
|
use crate::adapters::*;
|
||||||
|
use crate::config::RgaConfig;
|
||||||
|
use crate::recurse::concat_read_streams;
|
||||||
use crate::{matching::*, recurse::RecursingConcattyReader};
|
use crate::{matching::*, recurse::RecursingConcattyReader};
|
||||||
use crate::{
|
use crate::{
|
||||||
preproc_cache::{LmdbCache, PreprocCache},
|
preproc_cache::{LmdbCache, PreprocCache},
|
||||||
@ -7,47 +9,32 @@ use crate::{
|
|||||||
use anyhow::*;
|
use anyhow::*;
|
||||||
use log::*;
|
use log::*;
|
||||||
use path_clean::PathClean;
|
use path_clean::PathClean;
|
||||||
use postproc::PostprocPrefix;
|
// use postproc::PostprocPrefix;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
use std::path::Path;
|
||||||
use std::io::{BufRead, BufReader};
|
use tokio::io::AsyncBufReadExt;
|
||||||
|
use tokio::io::BufReader;
|
||||||
|
use tokio::io::{AsyncBufRead, AsyncRead};
|
||||||
|
|
||||||
use std::{rc::Rc, time::Instant};
|
use std::{rc::Rc, time::Instant};
|
||||||
/**
|
|
||||||
* preprocess a file as defined in `ai`.
|
type ActiveAdapters = Vec<(Rc<dyn FileAdapter>)>;
|
||||||
*
|
|
||||||
* If a cache is passed, read/write to it.
|
async fn choose_adapter(
|
||||||
*
|
config: &RgaConfig,
|
||||||
*/
|
filepath_hint: &Path,
|
||||||
pub fn rga_preproc(ai: AdaptInfo) -> Result<ReadBox> {
|
archive_recursion_depth: i32,
|
||||||
let AdaptInfo {
|
mut inp: &mut (impl AsyncBufRead + Unpin),
|
||||||
filepath_hint,
|
) -> Result<Option<(Rc<dyn FileAdapter>, FileMatcher, ActiveAdapters)>> {
|
||||||
is_real_file,
|
let active_adapters = get_adapters_filtered(config.custom_adapters.clone(), &config.adapters)?;
|
||||||
inp,
|
let adapters = adapter_matcher(&active_adapters, config.accurate)?;
|
||||||
line_prefix,
|
|
||||||
config,
|
|
||||||
archive_recursion_depth,
|
|
||||||
postprocess,
|
|
||||||
} = ai;
|
|
||||||
debug!("path (hint) to preprocess: {:?}", filepath_hint);
|
|
||||||
let filtered_adapters =
|
|
||||||
get_adapters_filtered(config.custom_adapters.clone(), &config.adapters)?;
|
|
||||||
let adapters = adapter_matcher(&filtered_adapters, config.accurate)?;
|
|
||||||
let filename = filepath_hint
|
let filename = filepath_hint
|
||||||
.file_name()
|
.file_name()
|
||||||
.ok_or_else(|| format_err!("Empty filename"))?;
|
.ok_or_else(|| format_err!("Empty filename"))?;
|
||||||
debug!("Archive recursion depth: {}", archive_recursion_depth);
|
debug!("Archive recursion depth: {}", archive_recursion_depth);
|
||||||
if archive_recursion_depth >= config.max_archive_recursion.0 {
|
|
||||||
let s = format!("{}[rga: max archive recursion reached]", line_prefix).into_bytes();
|
|
||||||
return Ok(Box::new(std::io::Cursor::new(s)));
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo: figure out when using a bufreader is a good idea and when it is not
|
|
||||||
// seems to be good for File::open() reads, but not sure about within archives (tar, zip)
|
|
||||||
let mut inp = BufReader::with_capacity(1 << 16, inp);
|
|
||||||
|
|
||||||
let mimetype = if config.accurate {
|
let mimetype = if config.accurate {
|
||||||
let buf = inp.fill_buf()?; // fill but do not consume!
|
let buf = inp.fill_buf().await?; // fill but do not consume!
|
||||||
let mimetype = tree_magic::from_u8(buf);
|
let mimetype = tree_magic::from_u8(buf);
|
||||||
debug!("mimetype: {:?}", mimetype);
|
debug!("mimetype: {:?}", mimetype);
|
||||||
Some(mimetype)
|
Some(mimetype)
|
||||||
@ -58,52 +45,105 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<ReadBox> {
|
|||||||
mimetype,
|
mimetype,
|
||||||
lossy_filename: filename.to_string_lossy().to_string(),
|
lossy_filename: filename.to_string_lossy().to_string(),
|
||||||
});
|
});
|
||||||
let (adapter, detection_reason) = match adapter {
|
Ok(adapter.map(|e| (e.0, e.1, active_adapters)))
|
||||||
Some((a, d)) => (a, d),
|
}
|
||||||
|
/**
|
||||||
|
* preprocess a file as defined in `ai`.
|
||||||
|
*
|
||||||
|
* If a cache is passed, read/write to it.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
pub async fn rga_preproc(ai: AdaptInfo<'_>) -> Result<ReadBox<'_>> {
|
||||||
|
debug!("path (hint) to preprocess: {:?}", ai.filepath_hint);
|
||||||
|
/*todo: move if archive_recursion_depth >= config.max_archive_recursion.0 {
|
||||||
|
let s = format!("{}[rga: max archive recursion reached]", line_prefix).into_bytes();
|
||||||
|
return Ok(Box::new(std::io::Cursor::new(s)));
|
||||||
|
}*/
|
||||||
|
|
||||||
|
// todo: figure out when using a bufreader is a good idea and when it is not
|
||||||
|
// seems to be good for File::open() reads, but not sure about within archives (tar, zip)
|
||||||
|
let mut inp = BufReader::with_capacity(1 << 16, ai.inp);
|
||||||
|
let adapter = choose_adapter(
|
||||||
|
&ai.config,
|
||||||
|
&ai.filepath_hint,
|
||||||
|
ai.archive_recursion_depth,
|
||||||
|
&mut inp,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
let (adapter, detection_reason, active_adapters) = match adapter {
|
||||||
|
Some((a, d, e)) => (a, d, e),
|
||||||
None => {
|
None => {
|
||||||
// allow passthrough if the file is in an archive or accurate matching is enabled
|
// allow passthrough if the file is in an archive or accurate matching is enabled
|
||||||
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
|
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
|
||||||
let allow_cat = !is_real_file || config.accurate;
|
let allow_cat = !ai.is_real_file || ai.config.accurate;
|
||||||
if allow_cat {
|
if allow_cat {
|
||||||
if postprocess {
|
if ai.postprocess {
|
||||||
(
|
panic!("not implemented");
|
||||||
|
/* (
|
||||||
Rc::new(PostprocPrefix {}) as Rc<dyn FileAdapter>,
|
Rc::new(PostprocPrefix {}) as Rc<dyn FileAdapter>,
|
||||||
FileMatcher::Fast(FastFileMatcher::FileExtension("default".to_string())), // todo: separate enum value for this
|
FileMatcher::Fast(FastFileMatcher::FileExtension("default".to_string())), // todo: separate enum value for this
|
||||||
)
|
)*/
|
||||||
} else {
|
} else {
|
||||||
return Ok(Box::new(inp));
|
return Ok(Box::pin(inp));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return Err(format_err!(
|
return Err(format_err!(
|
||||||
"No adapter found for file {:?}, passthrough disabled.",
|
"No adapter found for file {:?}, passthrough disabled.",
|
||||||
filename
|
ai.filepath_hint
|
||||||
|
.file_name()
|
||||||
|
.ok_or_else(|| format_err!("Empty filename"))?
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let path_hint_copy = filepath_hint.clone();
|
let path_hint_copy = ai.filepath_hint.clone();
|
||||||
run_adapter(
|
run_adapter_recursively(
|
||||||
AdaptInfo {
|
AdaptInfo {
|
||||||
filepath_hint,
|
inp: Box::pin(inp),
|
||||||
is_real_file,
|
..ai
|
||||||
inp: Box::new(inp),
|
|
||||||
line_prefix,
|
|
||||||
config,
|
|
||||||
archive_recursion_depth,
|
|
||||||
postprocess,
|
|
||||||
},
|
},
|
||||||
adapter,
|
adapter,
|
||||||
detection_reason,
|
detection_reason,
|
||||||
&filtered_adapters,
|
active_adapters,
|
||||||
)
|
)
|
||||||
|
.await
|
||||||
.with_context(|| format!("run_adapter({})", &path_hint_copy.to_string_lossy()))
|
.with_context(|| format!("run_adapter({})", &path_hint_copy.to_string_lossy()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run_adapter<'a>(
|
fn compute_cache_key(
|
||||||
|
filepath_hint: &Path,
|
||||||
|
adapter: &dyn FileAdapter,
|
||||||
|
active_adapters: ActiveAdapters,
|
||||||
|
) -> Result<Vec<u8>> {
|
||||||
|
let clean_path = filepath_hint.to_owned().clean();
|
||||||
|
let meta = std::fs::metadata(&filepath_hint)
|
||||||
|
.with_context(|| format!("reading metadata for {}", filepath_hint.to_string_lossy()))?;
|
||||||
|
let modified = meta.modified().expect("weird OS that can't into mtime");
|
||||||
|
|
||||||
|
if adapter.metadata().recurses {
|
||||||
|
let active_adapters_cache_key = active_adapters
|
||||||
|
.iter()
|
||||||
|
.map(|a| (a.metadata().name.clone(), a.metadata().version))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let key = (active_adapters_cache_key, clean_path, modified);
|
||||||
|
debug!("Cache key (with recursion): {:?}", key);
|
||||||
|
bincode::serialize(&key).context("could not serialize path")
|
||||||
|
} else {
|
||||||
|
let key = (
|
||||||
|
adapter.metadata().name.clone(),
|
||||||
|
adapter.metadata().version,
|
||||||
|
clean_path,
|
||||||
|
modified,
|
||||||
|
);
|
||||||
|
debug!("Cache key (no recursion): {:?}", key);
|
||||||
|
bincode::serialize(&key).context("could not serialize path")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async fn run_adapter_recursively<'a>(
|
||||||
ai: AdaptInfo<'a>,
|
ai: AdaptInfo<'a>,
|
||||||
adapter: Rc<dyn FileAdapter>,
|
adapter: Rc<dyn FileAdapter>,
|
||||||
detection_reason: FileMatcher,
|
detection_reason: FileMatcher,
|
||||||
filtered_adapters: &Vec<Rc<dyn FileAdapter>>,
|
active_adapters: ActiveAdapters,
|
||||||
) -> Result<ReadBox<'a>> {
|
) -> Result<ReadBox<'a>> {
|
||||||
let AdaptInfo {
|
let AdaptInfo {
|
||||||
filepath_hint,
|
filepath_hint,
|
||||||
@ -134,116 +174,56 @@ fn run_adapter<'a>(
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(mut cache) = cache {
|
let mut cache = cache.context("No cache?")?;
|
||||||
let cache_key: Vec<u8> = {
|
let cache_key: Vec<u8> = compute_cache_key(&filepath_hint, adapter.as_ref(), active_adapters)?;
|
||||||
let clean_path = filepath_hint.to_owned().clean();
|
// let dbg_ctx = format!("adapter {}", &adapter.metadata().name);
|
||||||
let meta = std::fs::metadata(&filepath_hint).with_context(|| {
|
let cached = cache.get(&db_name, &cache_key)?;
|
||||||
format!("reading metadata for {}", filepath_hint.to_string_lossy())
|
match cached {
|
||||||
})?;
|
Some(cached) => Ok(Box::pin(
|
||||||
let modified = meta.modified().expect("weird OS that can't into mtime");
|
async_compression::tokio::bufread::ZstdDecoder::new(std::io::Cursor::new(cached)),
|
||||||
|
)),
|
||||||
if adapter.metadata().recurses {
|
None => {
|
||||||
let key = (
|
debug!("cache MISS, running adapter");
|
||||||
filtered_adapters
|
debug!("adapting with caching...");
|
||||||
.iter()
|
let inp = adapter
|
||||||
.map(|a| (a.metadata().name.clone(), a.metadata().version))
|
.adapt(
|
||||||
.collect::<Vec<_>>(),
|
AdaptInfo {
|
||||||
clean_path,
|
line_prefix,
|
||||||
modified,
|
filepath_hint: filepath_hint.clone(),
|
||||||
);
|
is_real_file,
|
||||||
debug!("Cache key (with recursion): {:?}", key);
|
inp,
|
||||||
bincode::serialize(&key).expect("could not serialize path")
|
archive_recursion_depth,
|
||||||
} else {
|
config,
|
||||||
let key = (
|
postprocess,
|
||||||
adapter.metadata().name.clone(),
|
},
|
||||||
adapter.metadata().version,
|
&detection_reason,
|
||||||
clean_path,
|
|
||||||
modified,
|
|
||||||
);
|
|
||||||
debug!("Cache key (no recursion): {:?}", key);
|
|
||||||
bincode::serialize(&key).expect("could not serialize path")
|
|
||||||
}
|
|
||||||
};
|
|
||||||
// let dbg_ctx = format!("adapter {}", &adapter.metadata().name);
|
|
||||||
let cached = cache.get(&db_name, &cache_key)?;
|
|
||||||
match cached {
|
|
||||||
Some(cached) => Ok(Box::new(
|
|
||||||
zstd::stream::read::Decoder::new(std::io::Cursor::new(cached))
|
|
||||||
.context("could not create zstd decoder")?,
|
|
||||||
)),
|
|
||||||
None => {
|
|
||||||
debug!("cache MISS, running adapter");
|
|
||||||
debug!("adapting with caching...");
|
|
||||||
let inp = adapter
|
|
||||||
.adapt(
|
|
||||||
AdaptInfo {
|
|
||||||
line_prefix,
|
|
||||||
filepath_hint: filepath_hint.clone(),
|
|
||||||
is_real_file,
|
|
||||||
inp: Box::new(inp),
|
|
||||||
archive_recursion_depth,
|
|
||||||
config,
|
|
||||||
postprocess,
|
|
||||||
},
|
|
||||||
&detection_reason,
|
|
||||||
)
|
|
||||||
.with_context(|| {
|
|
||||||
format!(
|
|
||||||
"adapting {} via {} failed",
|
|
||||||
filepath_hint.to_string_lossy(),
|
|
||||||
meta.name
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let inp = RecursingConcattyReader::concat(inp)?;
|
|
||||||
let inp = CachingReader::new(
|
|
||||||
inp,
|
|
||||||
cache_max_blob_len.0.try_into().unwrap(),
|
|
||||||
cache_compression_level.0.try_into().unwrap(),
|
|
||||||
Box::new(move |(uncompressed_size, compressed)| {
|
|
||||||
debug!(
|
|
||||||
"uncompressed output: {}",
|
|
||||||
print_bytes(uncompressed_size as f64)
|
|
||||||
);
|
|
||||||
if let Some(cached) = compressed {
|
|
||||||
debug!("compressed output: {}", print_bytes(cached.len() as f64));
|
|
||||||
cache.set(&db_name, &cache_key, &cached)?
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
Ok(Box::new(inp))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// no cache arc - probably within archive
|
|
||||||
debug!("adapting without caching...");
|
|
||||||
let start = Instant::now();
|
|
||||||
let oread = adapter
|
|
||||||
.adapt(
|
|
||||||
AdaptInfo {
|
|
||||||
line_prefix,
|
|
||||||
filepath_hint: filepath_hint.clone(),
|
|
||||||
is_real_file,
|
|
||||||
inp,
|
|
||||||
archive_recursion_depth,
|
|
||||||
config,
|
|
||||||
postprocess,
|
|
||||||
},
|
|
||||||
&detection_reason,
|
|
||||||
)
|
|
||||||
.with_context(|| {
|
|
||||||
format!(
|
|
||||||
"adapting {} via {} without caching failed",
|
|
||||||
filepath_hint.to_string_lossy(),
|
|
||||||
meta.name
|
|
||||||
)
|
)
|
||||||
})?;
|
.with_context(|| {
|
||||||
debug!(
|
format!(
|
||||||
"running adapter {} took {}",
|
"adapting {} via {} failed",
|
||||||
adapter.metadata().name,
|
filepath_hint.to_string_lossy(),
|
||||||
print_dur(start)
|
meta.name
|
||||||
);
|
)
|
||||||
Ok(RecursingConcattyReader::concat(oread)?)
|
})?;
|
||||||
|
let inp = concat_read_streams(inp);
|
||||||
|
let inp = CachingReader::new(
|
||||||
|
inp,
|
||||||
|
cache_max_blob_len.0.try_into().unwrap(),
|
||||||
|
cache_compression_level.0.try_into().unwrap(),
|
||||||
|
Box::new(move |(uncompressed_size, compressed)| {
|
||||||
|
debug!(
|
||||||
|
"uncompressed output: {}",
|
||||||
|
print_bytes(uncompressed_size as f64)
|
||||||
|
);
|
||||||
|
if let Some(cached) = compressed {
|
||||||
|
debug!("compressed output: {}", print_bytes(cached.len() as f64));
|
||||||
|
cache.set(&db_name, &cache_key, &cached)?
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(Box::pin(inp))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,33 @@
|
|||||||
|
use tokio::io::AsyncRead;
|
||||||
|
use tokio_util::io::{ReaderStream, StreamReader};
|
||||||
|
|
||||||
use crate::preproc::rga_preproc;
|
use crate::preproc::rga_preproc;
|
||||||
use crate::{adapted_iter::AdaptedFilesIterBox, adapters::*};
|
use crate::{adapted_iter::AdaptedFilesIterBox, adapters::*};
|
||||||
|
use async_stream::stream;
|
||||||
use std::io::Read;
|
use tokio_stream::Stream;
|
||||||
|
use bytes::Bytes;
|
||||||
|
use tokio_stream::StreamExt;
|
||||||
|
|
||||||
pub struct RecursingConcattyReader<'a> {
|
pub struct RecursingConcattyReader<'a> {
|
||||||
inp: AdaptedFilesIterBox<'a>,
|
inp: AdaptedFilesIterBox<'a>,
|
||||||
cur: Option<ReadBox<'a>>,
|
cur: Option<ReadBox<'a>>,
|
||||||
}
|
}
|
||||||
|
pub fn concat_read_streams(
|
||||||
|
mut input: AdaptedFilesIterBox<'_>,
|
||||||
|
) -> ReadBox<'_> {
|
||||||
|
let s = stream! {
|
||||||
|
while let Some(output) = input.next() {
|
||||||
|
let mut stream = ReaderStream::new(output.inp);
|
||||||
|
while let Some(bytes) = stream.next().await {
|
||||||
|
yield bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Box::pin(StreamReader::new(s))
|
||||||
|
}
|
||||||
|
/*
|
||||||
impl<'a> RecursingConcattyReader<'a> {
|
impl<'a> RecursingConcattyReader<'a> {
|
||||||
pub fn concat(inp: AdaptedFilesIterBox<'a>) -> anyhow::Result<Box<dyn Read + 'a>> {
|
pub fn concat(inp: AdaptedFilesIterBox<'a>) -> anyhow::Result<ReadBox<'a>> {
|
||||||
let mut r = RecursingConcattyReader { inp, cur: None };
|
let mut r = RecursingConcattyReader { inp, cur: None };
|
||||||
r.ascend()?;
|
r.ascend()?;
|
||||||
Ok(Box::new(r))
|
Ok(Box::new(r))
|
||||||
@ -28,7 +47,7 @@ impl<'a> RecursingConcattyReader<'a> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<'a> Read for RecursingConcattyReader<'a> {
|
impl<'a> AsyncRead for RecursingConcattyReader<'a> {
|
||||||
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
match &mut self.cur {
|
match &mut self.cur {
|
||||||
None => Ok(0), // last file ended
|
None => Ok(0), // last file ended
|
||||||
@ -45,3 +64,4 @@ impl<'a> Read for RecursingConcattyReader<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
Loading…
Reference in New Issue
Block a user