diff --git a/Cargo.lock b/Cargo.lock index e0d89f7..ad61973 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -464,6 +464,12 @@ dependencies = [ "syn", ] +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + [[package]] name = "digest" version = "0.10.6" @@ -1143,6 +1149,15 @@ version = "6.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" +[[package]] +name = "output_vt100" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +dependencies = [ + "winapi", +] + [[package]] name = "owning_ref" version = "0.4.1" @@ -1254,6 +1269,18 @@ dependencies = [ "getopts", ] +[[package]] +name = "pretty_assertions" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755" +dependencies = [ + "ctor", + "diff", + "output_vt100", + "yansi", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -1379,6 +1406,7 @@ dependencies = [ "paste", "path-clean", "pretty-bytes", + "pretty_assertions", "regex", "rkv", "rusqlite", @@ -2155,6 +2183,12 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + [[package]] name = "zip" version = "0.6.3" diff --git a/Cargo.toml b/Cargo.toml index 0dab9f5..484d773 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,65 +1,66 @@ [package] -name = "ripgrep_all" -description = "rga: ripgrep, but also search in PDFs, E-Books, Office documents, zip, tar.gz, etc." -license = "AGPL-3.0-or-later" -readme = "README.md" -version = "0.9.7-alpha.0" -repository = "https://github.com/phiresky/ripgrep-all" -homepage = "https://github.com/phiresky/ripgrep-all" authors = ["phiresky "] +description = "rga: ripgrep, but also search in PDFs, E-Books, Office documents, zip, tar.gz, etc." edition = "2018" exclude = [ - "exampledir/*" + "exampledir/*", ] +homepage = "https://github.com/phiresky/ripgrep-all" +license = "AGPL-3.0-or-later" +name = "ripgrep_all" +readme = "README.md" +repository = "https://github.com/phiresky/ripgrep-all" +version = "0.9.7-alpha.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -tree_magic = { package = "tree_magic_mini", version = "3.0.0" } -regex = "1.3.9" -rkv = "0.17" -path-clean = "0.1.0" +anyhow = "1.0.32" +async-compression = {version = "0.3.15", features = ["tokio", "zstd"]} +async-stream = "0.3.3" bincode = "1.3.1" -serde = { version = "1.0.115", features = ["derive"] } -zstd = "0.11.2" -lazy_static = "1.4.0" -serde_json = "1.0.57" -crossbeam = "0.8.1" -clap = { version = "4.0.18", features = ["wrap_help"] } -log = "0.4.11" -env_logger = "0.9.0" -xz2 = "0.1.6" -flate2 = "1.0.14" +bytes = "1.2.1" bzip2 = "0.4.1" -tar = "0.4.30" chrono = "0.4.15" +clap = {version = "4.0.18", features = ["wrap_help"]} +crossbeam = "0.8.1" +crossbeam-channel = "0.5.1" +derive_more = "0.99.9" +directories-next = "2.0.0" +dyn-clonable = "0.9.0" +dyn-clone = "1.0.2" encoding_rs = "0.8.24" encoding_rs_io = "0.1.7" -rusqlite = { version = "0.28.0", features = ["vtab", "bundled"] } +env_logger = "0.9.0" +flate2 = "1.0.14" +glob = "0.3.0" +lazy_static = "1.4.0" +log = "0.4.11" +memchr = "2.3.3" +owning_ref = "0.4.1" +paste = "1.0.0" +path-clean = "0.1.0" +pretty-bytes = "0.2.2" +regex = "1.3.9" +rkv = "0.17" +rusqlite = {version = "0.28.0", features = ["vtab", "bundled"]} +schemars = {version = "0.8.0-alpha-4", features = ["preserve_order"]} +serde = {version = "1.0.115", features = ["derive"]} +serde_json = "1.0.57" size_format = "1.0.2" structopt = "0.3.17" -paste = "1.0.0" +tar = "0.4.30" tempfile = "3.1.0" -glob = "0.3.0" -anyhow = "1.0.32" -schemars = { version = "0.8.0-alpha-4", features = ["preserve_order"] } -directories-next = "2.0.0" -derive_more = "0.99.9" -pretty-bytes = "0.2.2" -memchr = "2.3.3" -crossbeam-channel = "0.5.1" -dyn-clone = "1.0.2" -dyn-clonable = "0.9.0" +tokio = {version = "1.21.2", features = ["full"]} +tokio-stream = {version = "0.1.11", features = ["io-util", "tokio-util"]} +tokio-util = {version = "0.7.4", features = ["io", "full"]} +tree_magic = {package = "tree_magic_mini", version = "3.0.0"} +xz2 = "0.1.6" zip = "0.6.3" -owning_ref = "0.4.1" -tokio = { version = "1.21.2", features = ["full"] } -async-compression = { version = "0.3.15", features = ["tokio", "zstd"] } -tokio-stream = { version = "0.1.11", features = ["io-util", "tokio-util"] } -async-stream = "0.3.3" -bytes = "1.2.1" -tokio-util = { version = "0.7.4", features = ["io", "full"] } +zstd = "0.11.2" [dev-dependencies] ctor = "0.1.20" +pretty_assertions = "1.3.0" tokio-test = "0.4.2" diff --git a/src/adapters/custom.rs b/src/adapters/custom.rs index b6ad811..70f2e77 100644 --- a/src/adapters/custom.rs +++ b/src/adapters/custom.rs @@ -310,6 +310,7 @@ mod test { use crate::preproc::loop_adapt; use crate::test_utils::*; use anyhow::Result; + use pretty_assertions::{assert_eq, assert_ne}; use tokio::fs::File; #[tokio::test] @@ -329,12 +330,13 @@ mod test { let o = adapted_to_vec(r).await?; assert_eq!( String::from_utf8(o)?, - "PREFIX:hello world -PREFIX:this is just a test. -PREFIX: -PREFIX:1 -PREFIX: -PREFIX:\u{c} + "PREFIX:Page 1:hello world +PREFIX:Page 1:this is just a test. +PREFIX:Page 1: +PREFIX:Page 1:1 +PREFIX:Page 1: +PREFIX:Page 1: +PREFIX:Page 2: " ); Ok(()) diff --git a/src/adapters/postproc.rs b/src/adapters/postproc.rs index ad5ae51..77f1067 100644 --- a/src/adapters/postproc.rs +++ b/src/adapters/postproc.rs @@ -175,14 +175,11 @@ impl FileAdapter for PostprocPageBreaks { a: super::AdaptInfo, _detection_reason: &crate::matching::FileMatcher, ) -> Result { - let read = add_newline(postproc_pagebreaks( - &a.line_prefix, - postproc_encoding(&a.line_prefix, a.inp)?, - )); + let read = postproc_pagebreaks("", postproc_encoding(&a.line_prefix, a.inp)?); // keep adapt info (filename etc) except replace inp let ai = AdaptInfo { inp: Box::pin(read), - postprocess: false, + postprocess: true, archive_recursion_depth: a.archive_recursion_depth + 1, filepath_hint: a .filepath_hint diff --git a/src/preproc.rs b/src/preproc.rs index e05a3fa..22667cc 100644 --- a/src/preproc.rs +++ b/src/preproc.rs @@ -227,8 +227,6 @@ pub fn loop_adapt( adapter.metadata().name ) })?; - debug!("got fph starting loop: {}", fph.to_string_lossy()); - let s = stream! { for await file in inp { match buf_choose_adapter(file).await.expect("todo: handle") { diff --git a/src/test_utils.rs b/src/test_utils.rs index 0efce30..cf5419a 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -9,6 +9,7 @@ use anyhow::Result; use std::path::{Path, PathBuf}; use tokio::io::AsyncReadExt; +pub use pretty_assertions::{assert_eq, assert_ne}; pub fn test_data_dir() -> PathBuf { let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); d.push("exampledir/test/");