fix poppler test

This commit is contained in:
phiresky 2022-12-25 18:44:52 +01:00
parent ddeceb0ce9
commit 24e866a153
6 changed files with 88 additions and 55 deletions

34
Cargo.lock generated
View File

@ -464,6 +464,12 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]] [[package]]
name = "digest" name = "digest"
version = "0.10.6" version = "0.10.6"
@ -1143,6 +1149,15 @@ version = "6.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee"
[[package]]
name = "output_vt100"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66"
dependencies = [
"winapi",
]
[[package]] [[package]]
name = "owning_ref" name = "owning_ref"
version = "0.4.1" version = "0.4.1"
@ -1254,6 +1269,18 @@ dependencies = [
"getopts", "getopts",
] ]
[[package]]
name = "pretty_assertions"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755"
dependencies = [
"ctor",
"diff",
"output_vt100",
"yansi",
]
[[package]] [[package]]
name = "proc-macro-error" name = "proc-macro-error"
version = "1.0.4" version = "1.0.4"
@ -1379,6 +1406,7 @@ dependencies = [
"paste", "paste",
"path-clean", "path-clean",
"pretty-bytes", "pretty-bytes",
"pretty_assertions",
"regex", "regex",
"rkv", "rkv",
"rusqlite", "rusqlite",
@ -2155,6 +2183,12 @@ dependencies = [
"lzma-sys", "lzma-sys",
] ]
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
[[package]] [[package]]
name = "zip" name = "zip"
version = "0.6.3" version = "0.6.3"

View File

@ -1,65 +1,66 @@
[package] [package]
name = "ripgrep_all"
description = "rga: ripgrep, but also search in PDFs, E-Books, Office documents, zip, tar.gz, etc."
license = "AGPL-3.0-or-later"
readme = "README.md"
version = "0.9.7-alpha.0"
repository = "https://github.com/phiresky/ripgrep-all"
homepage = "https://github.com/phiresky/ripgrep-all"
authors = ["phiresky <phireskyde+git@gmail.com>"] authors = ["phiresky <phireskyde+git@gmail.com>"]
description = "rga: ripgrep, but also search in PDFs, E-Books, Office documents, zip, tar.gz, etc."
edition = "2018" edition = "2018"
exclude = [ exclude = [
"exampledir/*" "exampledir/*",
] ]
homepage = "https://github.com/phiresky/ripgrep-all"
license = "AGPL-3.0-or-later"
name = "ripgrep_all"
readme = "README.md"
repository = "https://github.com/phiresky/ripgrep-all"
version = "0.9.7-alpha.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
tree_magic = { package = "tree_magic_mini", version = "3.0.0" } anyhow = "1.0.32"
regex = "1.3.9" async-compression = {version = "0.3.15", features = ["tokio", "zstd"]}
rkv = "0.17" async-stream = "0.3.3"
path-clean = "0.1.0"
bincode = "1.3.1" bincode = "1.3.1"
serde = { version = "1.0.115", features = ["derive"] } bytes = "1.2.1"
zstd = "0.11.2"
lazy_static = "1.4.0"
serde_json = "1.0.57"
crossbeam = "0.8.1"
clap = { version = "4.0.18", features = ["wrap_help"] }
log = "0.4.11"
env_logger = "0.9.0"
xz2 = "0.1.6"
flate2 = "1.0.14"
bzip2 = "0.4.1" bzip2 = "0.4.1"
tar = "0.4.30"
chrono = "0.4.15" chrono = "0.4.15"
clap = {version = "4.0.18", features = ["wrap_help"]}
crossbeam = "0.8.1"
crossbeam-channel = "0.5.1"
derive_more = "0.99.9"
directories-next = "2.0.0"
dyn-clonable = "0.9.0"
dyn-clone = "1.0.2"
encoding_rs = "0.8.24" encoding_rs = "0.8.24"
encoding_rs_io = "0.1.7" encoding_rs_io = "0.1.7"
env_logger = "0.9.0"
flate2 = "1.0.14"
glob = "0.3.0"
lazy_static = "1.4.0"
log = "0.4.11"
memchr = "2.3.3"
owning_ref = "0.4.1"
paste = "1.0.0"
path-clean = "0.1.0"
pretty-bytes = "0.2.2"
regex = "1.3.9"
rkv = "0.17"
rusqlite = {version = "0.28.0", features = ["vtab", "bundled"]} rusqlite = {version = "0.28.0", features = ["vtab", "bundled"]}
schemars = {version = "0.8.0-alpha-4", features = ["preserve_order"]}
serde = {version = "1.0.115", features = ["derive"]}
serde_json = "1.0.57"
size_format = "1.0.2" size_format = "1.0.2"
structopt = "0.3.17" structopt = "0.3.17"
paste = "1.0.0" tar = "0.4.30"
tempfile = "3.1.0" tempfile = "3.1.0"
glob = "0.3.0"
anyhow = "1.0.32"
schemars = { version = "0.8.0-alpha-4", features = ["preserve_order"] }
directories-next = "2.0.0"
derive_more = "0.99.9"
pretty-bytes = "0.2.2"
memchr = "2.3.3"
crossbeam-channel = "0.5.1"
dyn-clone = "1.0.2"
dyn-clonable = "0.9.0"
zip = "0.6.3"
owning_ref = "0.4.1"
tokio = {version = "1.21.2", features = ["full"]} tokio = {version = "1.21.2", features = ["full"]}
async-compression = { version = "0.3.15", features = ["tokio", "zstd"] }
tokio-stream = {version = "0.1.11", features = ["io-util", "tokio-util"]} tokio-stream = {version = "0.1.11", features = ["io-util", "tokio-util"]}
async-stream = "0.3.3"
bytes = "1.2.1"
tokio-util = {version = "0.7.4", features = ["io", "full"]} tokio-util = {version = "0.7.4", features = ["io", "full"]}
tree_magic = {package = "tree_magic_mini", version = "3.0.0"}
xz2 = "0.1.6"
zip = "0.6.3"
zstd = "0.11.2"
[dev-dependencies] [dev-dependencies]
ctor = "0.1.20" ctor = "0.1.20"
pretty_assertions = "1.3.0"
tokio-test = "0.4.2" tokio-test = "0.4.2"

View File

@ -310,6 +310,7 @@ mod test {
use crate::preproc::loop_adapt; use crate::preproc::loop_adapt;
use crate::test_utils::*; use crate::test_utils::*;
use anyhow::Result; use anyhow::Result;
use pretty_assertions::{assert_eq, assert_ne};
use tokio::fs::File; use tokio::fs::File;
#[tokio::test] #[tokio::test]
@ -329,12 +330,13 @@ mod test {
let o = adapted_to_vec(r).await?; let o = adapted_to_vec(r).await?;
assert_eq!( assert_eq!(
String::from_utf8(o)?, String::from_utf8(o)?,
"PREFIX:hello world "PREFIX:Page 1:hello world
PREFIX:this is just a test. PREFIX:Page 1:this is just a test.
PREFIX: PREFIX:Page 1:
PREFIX:1 PREFIX:Page 1:1
PREFIX: PREFIX:Page 1:
PREFIX:\u{c} PREFIX:Page 1:
PREFIX:Page 2:
" "
); );
Ok(()) Ok(())

View File

@ -175,14 +175,11 @@ impl FileAdapter for PostprocPageBreaks {
a: super::AdaptInfo, a: super::AdaptInfo,
_detection_reason: &crate::matching::FileMatcher, _detection_reason: &crate::matching::FileMatcher,
) -> Result<AdaptedFilesIterBox> { ) -> Result<AdaptedFilesIterBox> {
let read = add_newline(postproc_pagebreaks( let read = postproc_pagebreaks("", postproc_encoding(&a.line_prefix, a.inp)?);
&a.line_prefix,
postproc_encoding(&a.line_prefix, a.inp)?,
));
// keep adapt info (filename etc) except replace inp // keep adapt info (filename etc) except replace inp
let ai = AdaptInfo { let ai = AdaptInfo {
inp: Box::pin(read), inp: Box::pin(read),
postprocess: false, postprocess: true,
archive_recursion_depth: a.archive_recursion_depth + 1, archive_recursion_depth: a.archive_recursion_depth + 1,
filepath_hint: a filepath_hint: a
.filepath_hint .filepath_hint

View File

@ -227,8 +227,6 @@ pub fn loop_adapt(
adapter.metadata().name adapter.metadata().name
) )
})?; })?;
debug!("got fph starting loop: {}", fph.to_string_lossy());
let s = stream! { let s = stream! {
for await file in inp { for await file in inp {
match buf_choose_adapter(file).await.expect("todo: handle") { match buf_choose_adapter(file).await.expect("todo: handle") {

View File

@ -9,6 +9,7 @@ use anyhow::Result;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use tokio::io::AsyncReadExt; use tokio::io::AsyncReadExt;
pub use pretty_assertions::{assert_eq, assert_ne};
pub fn test_data_dir() -> PathBuf { pub fn test_data_dir() -> PathBuf {
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
d.push("exampledir/test/"); d.push("exampledir/test/");