fix poppler test

This commit is contained in:
phiresky 2022-12-25 18:44:52 +01:00
parent ddeceb0ce9
commit 24e866a153
6 changed files with 88 additions and 55 deletions

34
Cargo.lock generated
View File

@ -464,6 +464,12 @@ dependencies = [
"syn",
]
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "digest"
version = "0.10.6"
@ -1143,6 +1149,15 @@ version = "6.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee"
[[package]]
name = "output_vt100"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66"
dependencies = [
"winapi",
]
[[package]]
name = "owning_ref"
version = "0.4.1"
@ -1254,6 +1269,18 @@ dependencies = [
"getopts",
]
[[package]]
name = "pretty_assertions"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755"
dependencies = [
"ctor",
"diff",
"output_vt100",
"yansi",
]
[[package]]
name = "proc-macro-error"
version = "1.0.4"
@ -1379,6 +1406,7 @@ dependencies = [
"paste",
"path-clean",
"pretty-bytes",
"pretty_assertions",
"regex",
"rkv",
"rusqlite",
@ -2155,6 +2183,12 @@ dependencies = [
"lzma-sys",
]
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
[[package]]
name = "zip"
version = "0.6.3"

View File

@ -1,65 +1,66 @@
[package]
name = "ripgrep_all"
description = "rga: ripgrep, but also search in PDFs, E-Books, Office documents, zip, tar.gz, etc."
license = "AGPL-3.0-or-later"
readme = "README.md"
version = "0.9.7-alpha.0"
repository = "https://github.com/phiresky/ripgrep-all"
homepage = "https://github.com/phiresky/ripgrep-all"
authors = ["phiresky <phireskyde+git@gmail.com>"]
description = "rga: ripgrep, but also search in PDFs, E-Books, Office documents, zip, tar.gz, etc."
edition = "2018"
exclude = [
"exampledir/*"
"exampledir/*",
]
homepage = "https://github.com/phiresky/ripgrep-all"
license = "AGPL-3.0-or-later"
name = "ripgrep_all"
readme = "README.md"
repository = "https://github.com/phiresky/ripgrep-all"
version = "0.9.7-alpha.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
tree_magic = { package = "tree_magic_mini", version = "3.0.0" }
regex = "1.3.9"
rkv = "0.17"
path-clean = "0.1.0"
anyhow = "1.0.32"
async-compression = {version = "0.3.15", features = ["tokio", "zstd"]}
async-stream = "0.3.3"
bincode = "1.3.1"
serde = { version = "1.0.115", features = ["derive"] }
zstd = "0.11.2"
lazy_static = "1.4.0"
serde_json = "1.0.57"
crossbeam = "0.8.1"
clap = { version = "4.0.18", features = ["wrap_help"] }
log = "0.4.11"
env_logger = "0.9.0"
xz2 = "0.1.6"
flate2 = "1.0.14"
bytes = "1.2.1"
bzip2 = "0.4.1"
tar = "0.4.30"
chrono = "0.4.15"
clap = {version = "4.0.18", features = ["wrap_help"]}
crossbeam = "0.8.1"
crossbeam-channel = "0.5.1"
derive_more = "0.99.9"
directories-next = "2.0.0"
dyn-clonable = "0.9.0"
dyn-clone = "1.0.2"
encoding_rs = "0.8.24"
encoding_rs_io = "0.1.7"
env_logger = "0.9.0"
flate2 = "1.0.14"
glob = "0.3.0"
lazy_static = "1.4.0"
log = "0.4.11"
memchr = "2.3.3"
owning_ref = "0.4.1"
paste = "1.0.0"
path-clean = "0.1.0"
pretty-bytes = "0.2.2"
regex = "1.3.9"
rkv = "0.17"
rusqlite = {version = "0.28.0", features = ["vtab", "bundled"]}
schemars = {version = "0.8.0-alpha-4", features = ["preserve_order"]}
serde = {version = "1.0.115", features = ["derive"]}
serde_json = "1.0.57"
size_format = "1.0.2"
structopt = "0.3.17"
paste = "1.0.0"
tar = "0.4.30"
tempfile = "3.1.0"
glob = "0.3.0"
anyhow = "1.0.32"
schemars = { version = "0.8.0-alpha-4", features = ["preserve_order"] }
directories-next = "2.0.0"
derive_more = "0.99.9"
pretty-bytes = "0.2.2"
memchr = "2.3.3"
crossbeam-channel = "0.5.1"
dyn-clone = "1.0.2"
dyn-clonable = "0.9.0"
zip = "0.6.3"
owning_ref = "0.4.1"
tokio = {version = "1.21.2", features = ["full"]}
async-compression = { version = "0.3.15", features = ["tokio", "zstd"] }
tokio-stream = {version = "0.1.11", features = ["io-util", "tokio-util"]}
async-stream = "0.3.3"
bytes = "1.2.1"
tokio-util = {version = "0.7.4", features = ["io", "full"]}
tree_magic = {package = "tree_magic_mini", version = "3.0.0"}
xz2 = "0.1.6"
zip = "0.6.3"
zstd = "0.11.2"
[dev-dependencies]
ctor = "0.1.20"
pretty_assertions = "1.3.0"
tokio-test = "0.4.2"

View File

@ -310,6 +310,7 @@ mod test {
use crate::preproc::loop_adapt;
use crate::test_utils::*;
use anyhow::Result;
use pretty_assertions::{assert_eq, assert_ne};
use tokio::fs::File;
#[tokio::test]
@ -329,12 +330,13 @@ mod test {
let o = adapted_to_vec(r).await?;
assert_eq!(
String::from_utf8(o)?,
"PREFIX:hello world
PREFIX:this is just a test.
PREFIX:
PREFIX:1
PREFIX:
PREFIX:\u{c}
"PREFIX:Page 1:hello world
PREFIX:Page 1:this is just a test.
PREFIX:Page 1:
PREFIX:Page 1:1
PREFIX:Page 1:
PREFIX:Page 1:
PREFIX:Page 2:
"
);
Ok(())

View File

@ -175,14 +175,11 @@ impl FileAdapter for PostprocPageBreaks {
a: super::AdaptInfo,
_detection_reason: &crate::matching::FileMatcher,
) -> Result<AdaptedFilesIterBox> {
let read = add_newline(postproc_pagebreaks(
&a.line_prefix,
postproc_encoding(&a.line_prefix, a.inp)?,
));
let read = postproc_pagebreaks("", postproc_encoding(&a.line_prefix, a.inp)?);
// keep adapt info (filename etc) except replace inp
let ai = AdaptInfo {
inp: Box::pin(read),
postprocess: false,
postprocess: true,
archive_recursion_depth: a.archive_recursion_depth + 1,
filepath_hint: a
.filepath_hint

View File

@ -227,8 +227,6 @@ pub fn loop_adapt(
adapter.metadata().name
)
})?;
debug!("got fph starting loop: {}", fph.to_string_lossy());
let s = stream! {
for await file in inp {
match buf_choose_adapter(file).await.expect("todo: handle") {

View File

@ -9,6 +9,7 @@ use anyhow::Result;
use std::path::{Path, PathBuf};
use tokio::io::AsyncReadExt;
pub use pretty_assertions::{assert_eq, assert_ne};
pub fn test_data_dir() -> PathBuf {
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
d.push("exampledir/test/");