diff --git a/Cargo.lock b/Cargo.lock index 0df8f0d..78bd8f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -139,25 +139,17 @@ dependencies = [ "syn", ] -[[package]] -name = "async_io_utilities" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b20cffc5590f4bf33f05f97a3ea587feba9c50d20325b401daa096b92ff7da0" -dependencies = [ - "tokio 1.25.0", -] - [[package]] name = "async_zip" -version = "0.0.9" +version = "0.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a36d43bdefc7215b2b3a97edd03b1553b7969ad76551025eedd3b913c645f6e" +checksum = "b2105142db9c6203b9dadc83b0553394589a6cb31b1449a3b46b42f47c3434d0" dependencies = [ "async-compression", - "async_io_utilities", "chrono", "crc32fast", + "log", + "pin-project", "thiserror", "tokio 1.25.0", ] @@ -288,11 +280,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" dependencies = [ "iana-time-zone", - "js-sys", "num-integer", "num-traits", - "time", - "wasm-bindgen", "winapi", ] @@ -573,12 +562,12 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.9.3" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0" dependencies = [ - "atty", "humantime", + "is-terminal", "log", "regex", "termcolor", @@ -744,7 +733,7 @@ checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", ] [[package]] @@ -1051,7 +1040,7 @@ checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" dependencies = [ "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "windows-sys 0.45.0", ] @@ -1220,6 +1209,26 @@ dependencies = [ "indexmap", ] +[[package]] +name = "pin-project" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "pin-project-lite" version = "0.1.12" @@ -1707,17 +1716,6 @@ dependencies = [ "syn", ] -[[package]] -name = "time" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - [[package]] name = "tinyvec" version = "1.6.0" @@ -1949,12 +1947,6 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index 3aca928..94f9fce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ anyhow = "1.0.32" async-compression = {version = "0.3.15", features = ["all", "all-algorithms", "tokio"]} async-stream = "0.3.3" async-trait = "0.1.64" -async_zip = "0.0.9" +async_zip = {version = "0.0.12", features = ["full"]} bincode = "1.3.1" bytes = "1.2.1" clap = {version = "4.0.18", features = ["wrap_help"]} @@ -32,7 +32,7 @@ dyn-clonable = "0.9.0" dyn-clone = "1.0.2" encoding_rs = "0.8.24" encoding_rs_io = "0.1.7" -env_logger = "0.9.0" +env_logger = "0.10.0" glob = "0.3.0" json_comments = "0.2.1" lazy_static = "1.4.0" @@ -42,7 +42,7 @@ paste = "1.0.0" path-clean = "0.1.0" pretty-bytes = "0.2.2" regex = "1.3.9" -rkv = "0.17" +rkv = "0.17" # 0.18 removes lmdb backend rusqlite = {version = "0.28.0", features = ["vtab", "bundled"]} schemars = {version = "0.8.0-alpha-4", features = ["preserve_order"]} serde = {version = "1.0.115", features = ["derive"]} diff --git a/src/adapters/zip.rs b/src/adapters/zip.rs index 43a1603..56da762 100644 --- a/src/adapters/zip.rs +++ b/src/adapters/zip.rs @@ -59,9 +59,9 @@ impl FileAdapter for ZipAdapter { let zip = ZipFileReader::new(&filepath_hint).await?; let s = stream! { - for i in 0..zip.entries().len() { - let reader = zip.entry_reader(i).await?; - let file = reader.entry(); + for i in 0..zip.file().entries().len() { + let file = zip.get_entry(i)?; + let reader = zip.entry(i).await?; if file.filename().ends_with('/') { continue; } @@ -103,10 +103,11 @@ impl FileAdapter for ZipAdapter { let mut zip = ZipFileReader::new(inp); let s = stream! { - while !zip.finished() { - if let Some(reader) = zip.entry_reader().await? { - let file = reader.entry(); + while let Some(mut entry) = zip.next_entry().await? { + let file = entry.entry(); if file.filename().ends_with('/') { + zip = entry.skip().await?; + continue; } debug!( @@ -119,6 +120,7 @@ impl FileAdapter for ZipAdapter { ); let new_line_prefix = format!("{}{}: ", line_prefix, file.filename()); let fname = PathBuf::from(file.filename()); + let reader = entry.reader(); tokio::pin!(reader); // SAFETY: this should be solvable without unsafe but idk how :( // the issue is that ZipEntryReader borrows from ZipFileReader, but we need to yield it here into the stream @@ -138,7 +140,8 @@ impl FileAdapter for ZipAdapter { postprocess, config: config.clone(), }); - } + zip = entry.done().await.context("going to next file in zip but entry was not read fully")?; + } };