mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-24 12:24:56 +00:00
partial migration to read->read
This commit is contained in:
parent
d0d74adfe9
commit
2f580b135a
24
Cargo.lock
generated
24
Cargo.lock
generated
@ -288,6 +288,27 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dyn-clonable"
|
||||||
|
version = "0.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4e9232f0e607a262ceb9bd5141a3dfb3e4db6994b31989bbfd845878cba59fd4"
|
||||||
|
dependencies = [
|
||||||
|
"dyn-clonable-impl",
|
||||||
|
"dyn-clone",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dyn-clonable-impl"
|
||||||
|
version = "0.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "558e40ea573c374cf53507fd240b7ee2f5477df7cfebdb97323ec61c719399c5"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dyn-clone"
|
name = "dyn-clone"
|
||||||
version = "1.0.1"
|
version = "1.0.1"
|
||||||
@ -950,8 +971,11 @@ dependencies = [
|
|||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"crossbeam",
|
"crossbeam",
|
||||||
|
"crossbeam-channel",
|
||||||
"derive_more",
|
"derive_more",
|
||||||
"directories-next",
|
"directories-next",
|
||||||
|
"dyn-clonable",
|
||||||
|
"dyn-clone",
|
||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
"encoding_rs_io",
|
"encoding_rs_io",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
|
@ -49,3 +49,6 @@ directories-next = "1.0.1"
|
|||||||
derive_more = "0.99.7"
|
derive_more = "0.99.7"
|
||||||
pretty-bytes = "0.2.2"
|
pretty-bytes = "0.2.2"
|
||||||
memchr = "2.3.3"
|
memchr = "2.3.3"
|
||||||
|
crossbeam-channel = "0.4.2"
|
||||||
|
dyn-clone = "1.0.1"
|
||||||
|
dyn-clonable = "0.9.0"
|
||||||
|
BIN
exampledir/test.djvu
Normal file
BIN
exampledir/test.djvu
Normal file
Binary file not shown.
BIN
exampledir/test/hello.gz
Normal file
BIN
exampledir/test/hello.gz
Normal file
Binary file not shown.
BIN
exampledir/test/hello.sqlite3
Normal file
BIN
exampledir/test/hello.sqlite3
Normal file
Binary file not shown.
BIN
exampledir/test/short.pdf
Normal file
BIN
exampledir/test/short.pdf
Normal file
Binary file not shown.
BIN
exampledir/test/short.pdf.gz
Normal file
BIN
exampledir/test/short.pdf.gz
Normal file
Binary file not shown.
@ -1,28 +1,31 @@
|
|||||||
pub mod custom;
|
pub mod custom;
|
||||||
pub mod decompress;
|
pub mod decompress;
|
||||||
pub mod ffmpeg;
|
//pub mod ffmpeg;
|
||||||
pub mod fns;
|
pub mod fns;
|
||||||
pub mod pdfpages;
|
//pub mod pdfpages;
|
||||||
pub mod poppler;
|
pub mod poppler;
|
||||||
pub mod spawning;
|
pub mod spawning;
|
||||||
pub mod sqlite;
|
pub mod sqlite;
|
||||||
pub mod tar;
|
//pub mod tar;
|
||||||
pub mod tesseract;
|
//pub mod tesseract;
|
||||||
pub mod zip;
|
pub mod writing;
|
||||||
|
// pub mod zip;
|
||||||
use crate::matching::*;
|
use crate::matching::*;
|
||||||
use crate::preproc::PreprocConfig;
|
use crate::preproc::PreprocConfig;
|
||||||
use anyhow::*;
|
use anyhow::*;
|
||||||
use custom::builtin_spawning_adapters;
|
use custom::builtin_spawning_adapters;
|
||||||
use custom::CustomAdapterConfig;
|
use custom::CustomAdapterConfig;
|
||||||
use log::*;
|
use log::*;
|
||||||
use regex::Regex;
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
use std::iter::Iterator;
|
use std::iter::Iterator;
|
||||||
use std::path::Path;
|
use std::path::{Path, PathBuf};
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
|
pub type ReadBox = Box<dyn Read + Send>;
|
||||||
|
|
||||||
pub struct AdapterMeta {
|
pub struct AdapterMeta {
|
||||||
/// unique short name of this adapter (a-z0-9 only)
|
/// unique short name of this adapter (a-z0-9 only)
|
||||||
pub name: String,
|
pub name: String,
|
||||||
@ -63,22 +66,20 @@ pub trait FileAdapter: GetMetadata {
|
|||||||
/// adapt a file.
|
/// adapt a file.
|
||||||
///
|
///
|
||||||
/// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
|
/// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
|
||||||
fn adapt(&self, a: AdaptInfo, detection_reason: &SlowMatcher) -> Result<()>;
|
fn adapt(&self, a: AdaptInfo, detection_reason: &SlowMatcher) -> Result<ReadBox>;
|
||||||
}
|
}
|
||||||
pub struct AdaptInfo<'a> {
|
pub struct AdaptInfo {
|
||||||
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
|
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
|
||||||
pub filepath_hint: &'a Path,
|
pub filepath_hint: PathBuf,
|
||||||
/// true if filepath_hint is an actual file on the file system
|
/// true if filepath_hint is an actual file on the file system
|
||||||
pub is_real_file: bool,
|
pub is_real_file: bool,
|
||||||
/// depth at which this file is in archives. 0 for real filesystem
|
/// depth at which this file is in archives. 0 for real filesystem
|
||||||
pub archive_recursion_depth: i32,
|
pub archive_recursion_depth: i32,
|
||||||
/// stream to read the file from. can be from a file or from some decoder
|
/// stream to read the file from. can be from a file or from some decoder
|
||||||
pub inp: &'a mut dyn Read,
|
pub inp: ReadBox,
|
||||||
/// stream to write to. will be written to from a different thread
|
|
||||||
pub oup: &'a mut (dyn Write + Send),
|
|
||||||
/// prefix every output line with this string to better indicate the file's location if it is in some archive
|
/// prefix every output line with this string to better indicate the file's location if it is in some archive
|
||||||
pub line_prefix: &'a str,
|
pub line_prefix: String,
|
||||||
pub config: PreprocConfig<'a>,
|
pub config: PreprocConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// (enabledAdapters, disabledAdapters)
|
/// (enabledAdapters, disabledAdapters)
|
||||||
@ -94,13 +95,13 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
|
|||||||
}
|
}
|
||||||
|
|
||||||
let internal_adapters: Vec<Rc<dyn FileAdapter>> = vec![
|
let internal_adapters: Vec<Rc<dyn FileAdapter>> = vec![
|
||||||
Rc::new(ffmpeg::FFmpegAdapter::new()),
|
//Rc::new(ffmpeg::FFmpegAdapter::new()),
|
||||||
Rc::new(zip::ZipAdapter::new()),
|
//Rc::new(zip::ZipAdapter::new()),
|
||||||
Rc::new(decompress::DecompressAdapter::new()),
|
Rc::new(decompress::DecompressAdapter::new()),
|
||||||
Rc::new(tar::TarAdapter::new()),
|
// Rc::new(tar::TarAdapter::new()),
|
||||||
Rc::new(sqlite::SqliteAdapter::new()),
|
Rc::new(sqlite::SqliteAdapter::new()),
|
||||||
Rc::new(pdfpages::PdfPagesAdapter::new()),
|
// Rc::new(pdfpages::PdfPagesAdapter::new()),
|
||||||
Rc::new(tesseract::TesseractAdapter::new()),
|
//Rc::new(tesseract::TesseractAdapter::new()),
|
||||||
];
|
];
|
||||||
adapters.extend(
|
adapters.extend(
|
||||||
builtin_spawning_adapters
|
builtin_spawning_adapters
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
use super::{spawning::SpawningFileAdapter, AdapterMeta, GetMetadata};
|
use super::{
|
||||||
|
spawning::{SpawningFileAdapter, SpawningFileAdapterTrait},
|
||||||
|
AdapterMeta, GetMetadata,
|
||||||
|
};
|
||||||
use crate::matching::{FastMatcher, SlowMatcher};
|
use crate::matching::{FastMatcher, SlowMatcher};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use schemars::JsonSchema;
|
use schemars::JsonSchema;
|
||||||
@ -112,7 +115,7 @@ impl GetMetadata for CustomSpawningFileAdapter {
|
|||||||
&self.meta
|
&self.meta
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl SpawningFileAdapter for CustomSpawningFileAdapter {
|
impl SpawningFileAdapterTrait for CustomSpawningFileAdapter {
|
||||||
fn get_exe(&self) -> &str {
|
fn get_exe(&self) -> &str {
|
||||||
&self.binary
|
&self.binary
|
||||||
}
|
}
|
||||||
@ -126,12 +129,12 @@ impl SpawningFileAdapter for CustomSpawningFileAdapter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl CustomAdapterConfig {
|
impl CustomAdapterConfig {
|
||||||
pub fn to_adapter(self) -> CustomSpawningFileAdapter {
|
pub fn to_adapter(&self) -> SpawningFileAdapter {
|
||||||
CustomSpawningFileAdapter {
|
let ad = CustomSpawningFileAdapter {
|
||||||
binary: self.binary.clone(),
|
binary: self.binary.clone(),
|
||||||
args: self.args.clone(),
|
args: self.args.clone(),
|
||||||
meta: AdapterMeta {
|
meta: AdapterMeta {
|
||||||
name: self.name,
|
name: self.name.clone(),
|
||||||
version: self.version,
|
version: self.version,
|
||||||
description: format!(
|
description: format!(
|
||||||
"{}\nRuns: {} {}",
|
"{}\nRuns: {} {}",
|
||||||
@ -145,7 +148,7 @@ impl CustomAdapterConfig {
|
|||||||
.iter()
|
.iter()
|
||||||
.map(|s| FastMatcher::FileExtension(s.to_string()))
|
.map(|s| FastMatcher::FileExtension(s.to_string()))
|
||||||
.collect(),
|
.collect(),
|
||||||
slow_matchers: self.mimetypes.map(|mimetypes| {
|
slow_matchers: self.mimetypes.as_ref().map(|mimetypes| {
|
||||||
mimetypes
|
mimetypes
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| SlowMatcher::MimeType(s.to_string()))
|
.map(|s| SlowMatcher::MimeType(s.to_string()))
|
||||||
@ -153,6 +156,43 @@ impl CustomAdapterConfig {
|
|||||||
}),
|
}),
|
||||||
disabled_by_default: self.disabled_by_default.unwrap_or(false),
|
disabled_by_default: self.disabled_by_default.unwrap_or(false),
|
||||||
},
|
},
|
||||||
}
|
};
|
||||||
|
SpawningFileAdapter::new(Box::new(ad))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::super::FileAdapter;
|
||||||
|
use super::*;
|
||||||
|
use crate::test_utils::*;
|
||||||
|
use anyhow::Result;
|
||||||
|
use std::fs::File;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn poppler() -> Result<()> {
|
||||||
|
let adapter = builtin_spawning_adapters
|
||||||
|
.iter()
|
||||||
|
.find(|e| e.name == "poppler")
|
||||||
|
.expect("no poppler adapter");
|
||||||
|
|
||||||
|
let adapter = adapter.to_adapter();
|
||||||
|
|
||||||
|
let filepath = test_data_dir().join("short.pdf");
|
||||||
|
|
||||||
|
let (a, d) = simple_adapt_info(&filepath, Box::new(File::open(&filepath)?));
|
||||||
|
let mut r = adapter.adapt(a, &d)?;
|
||||||
|
let mut o = Vec::new();
|
||||||
|
r.read_to_end(&mut o)?;
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8(o)?,
|
||||||
|
"hello world
|
||||||
|
this is just a test.
|
||||||
|
|
||||||
|
1
|
||||||
|
|
||||||
|
\u{c}"
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::preproc::rga_preproc;
|
use crate::preproc::rga_preproc;
|
||||||
use anyhow::*;
|
use anyhow::Result;
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
@ -47,16 +47,13 @@ impl GetMetadata for DecompressAdapter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn decompress_any<'a, R>(reason: &SlowMatcher, inp: &'a mut R) -> Result<Box<dyn Read + 'a>>
|
fn decompress_any(reason: &SlowMatcher, inp: ReadBox) -> Result<ReadBox> {
|
||||||
where
|
|
||||||
R: Read,
|
|
||||||
{
|
|
||||||
use FastMatcher::*;
|
use FastMatcher::*;
|
||||||
use SlowMatcher::*;
|
use SlowMatcher::*;
|
||||||
let gz = |inp: &'a mut R| Box::new(flate2::read::MultiGzDecoder::new(inp));
|
let gz = |inp: ReadBox| Box::new(flate2::read::MultiGzDecoder::new(inp));
|
||||||
let bz2 = |inp: &'a mut R| Box::new(bzip2::read::BzDecoder::new(inp));
|
let bz2 = |inp: ReadBox| Box::new(bzip2::read::BzDecoder::new(inp));
|
||||||
let xz = |inp: &'a mut R| Box::new(xz2::read::XzDecoder::new_multi_decoder(inp));
|
let xz = |inp: ReadBox| Box::new(xz2::read::XzDecoder::new_multi_decoder(inp));
|
||||||
let zst = |inp: &'a mut R| zstd::stream::read::Decoder::new(inp); // returns result
|
let zst = |inp: ReadBox| zstd::stream::read::Decoder::new(inp); // returns result
|
||||||
|
|
||||||
Ok(match reason {
|
Ok(match reason {
|
||||||
Fast(FileExtension(ext)) => match ext.as_ref() {
|
Fast(FileExtension(ext)) => match ext.as_ref() {
|
||||||
@ -92,35 +89,33 @@ fn get_inner_filename(filename: &Path) -> PathBuf {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl FileAdapter for DecompressAdapter {
|
impl FileAdapter for DecompressAdapter {
|
||||||
fn adapt(&self, ai: AdaptInfo, detection_reason: &SlowMatcher) -> Result<()> {
|
fn adapt(&self, ai: AdaptInfo, detection_reason: &SlowMatcher) -> Result<ReadBox> {
|
||||||
let AdaptInfo {
|
let AdaptInfo {
|
||||||
filepath_hint,
|
filepath_hint,
|
||||||
mut inp,
|
inp,
|
||||||
oup,
|
|
||||||
line_prefix,
|
line_prefix,
|
||||||
archive_recursion_depth,
|
archive_recursion_depth,
|
||||||
config,
|
config,
|
||||||
..
|
..
|
||||||
} = ai;
|
} = ai;
|
||||||
|
|
||||||
let mut decompress = decompress_any(detection_reason, &mut inp)?;
|
|
||||||
let ai2: AdaptInfo = AdaptInfo {
|
let ai2: AdaptInfo = AdaptInfo {
|
||||||
filepath_hint: &get_inner_filename(filepath_hint),
|
filepath_hint: get_inner_filename(&filepath_hint),
|
||||||
is_real_file: false,
|
is_real_file: false,
|
||||||
archive_recursion_depth: archive_recursion_depth + 1,
|
archive_recursion_depth: archive_recursion_depth + 1,
|
||||||
inp: &mut decompress,
|
inp: decompress_any(detection_reason, inp)?,
|
||||||
oup,
|
|
||||||
line_prefix,
|
line_prefix,
|
||||||
config: config.clone(),
|
config: config.clone(),
|
||||||
};
|
};
|
||||||
rga_preproc(ai2)?;
|
rga_preproc(ai2)
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::test_utils::*;
|
||||||
|
use std::fs::File;
|
||||||
#[test]
|
#[test]
|
||||||
fn test_inner_filename() {
|
fn test_inner_filename() {
|
||||||
for (a, b) in &[
|
for (a, b) in &[
|
||||||
@ -135,4 +130,40 @@ mod tests {
|
|||||||
assert_eq!(get_inner_filename(&PathBuf::from(a)), PathBuf::from(*b));
|
assert_eq!(get_inner_filename(&PathBuf::from(a)), PathBuf::from(*b));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn gz() -> Result<()> {
|
||||||
|
let adapter = DecompressAdapter;
|
||||||
|
|
||||||
|
let filepath = test_data_dir().join("hello.gz");
|
||||||
|
|
||||||
|
let (a, d) = simple_adapt_info(&filepath, Box::new(File::open(&filepath)?));
|
||||||
|
let mut r = adapter.adapt(a, &d)?;
|
||||||
|
let mut o = Vec::new();
|
||||||
|
r.read_to_end(&mut o)?;
|
||||||
|
assert_eq!(String::from_utf8(o)?, "hello\n");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pdf_gz() -> Result<()> {
|
||||||
|
let adapter = DecompressAdapter;
|
||||||
|
|
||||||
|
let filepath = test_data_dir().join("short.pdf.gz");
|
||||||
|
|
||||||
|
let (a, d) = simple_adapt_info(&filepath, Box::new(File::open(&filepath)?));
|
||||||
|
let mut r = adapter.adapt(a, &d)?;
|
||||||
|
let mut o = Vec::new();
|
||||||
|
r.read_to_end(&mut o)?;
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8(o)?,
|
||||||
|
"hello world
|
||||||
|
this is just a test.
|
||||||
|
|
||||||
|
1
|
||||||
|
|
||||||
|
\u{c}"
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -92,7 +92,7 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn postprocB(line_prefix: &str, inp: impl Read) -> Result<impl Read> {
|
pub fn postprocB(_line_prefix: &str, inp: impl Read) -> Result<impl Read> {
|
||||||
let mut page_count = 1;
|
let mut page_count = 1;
|
||||||
|
|
||||||
Ok(ByteReplacer {
|
Ok(ByteReplacer {
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
use super::*;
|
|
||||||
use lazy_static::lazy_static;
|
|
||||||
use spawning::SpawningFileAdapter;
|
|
||||||
use std::io::BufReader;
|
|
||||||
use std::process::Command;
|
|
||||||
/*
|
/*
|
||||||
static EXTENSIONS: &[&str] = &["pdf"];
|
static EXTENSIONS: &[&str] = &["pdf"];
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ use log::*;
|
|||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
use std::process::Stdio;
|
use std::process::{Child, Stdio};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy a Read to a Write, while prefixing every line with a prefix.
|
* Copy a Read to a Write, while prefixing every line with a prefix.
|
||||||
@ -53,15 +53,37 @@ pub fn postproc_line_prefix(
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
pub trait SpawningFileAdapter: GetMetadata {
|
pub trait SpawningFileAdapterTrait: GetMetadata {
|
||||||
fn get_exe(&self) -> &str;
|
fn get_exe(&self) -> &str;
|
||||||
fn command(&self, filepath_hint: &Path, command: Command) -> Command;
|
fn command(&self, filepath_hint: &Path, command: Command) -> Command;
|
||||||
|
|
||||||
fn postproc(line_prefix: &str, inp: &mut dyn Read, oup: &mut dyn Write) -> Result<()> {
|
/*fn postproc(&self, line_prefix: &str, inp: &mut dyn Read, oup: &mut dyn Write) -> Result<()> {
|
||||||
postproc_line_prefix(line_prefix, inp, oup)
|
postproc_line_prefix(line_prefix, inp, oup)
|
||||||
|
}*/
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SpawningFileAdapter {
|
||||||
|
inner: Box<dyn SpawningFileAdapterTrait>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SpawningFileAdapter {
|
||||||
|
pub fn new(inner: Box<dyn SpawningFileAdapterTrait>) -> SpawningFileAdapter {
|
||||||
|
SpawningFileAdapter { inner }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl GetMetadata for SpawningFileAdapter {
|
||||||
|
fn metadata(&self) -> &AdapterMeta {
|
||||||
|
self.inner.metadata()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*impl<T: SpawningFileAdapterTrait> From<T> for SpawningFileAdapter {
|
||||||
|
fn from(e: dyn T) -> Self {
|
||||||
|
SpawningFileAdapter { inner: Box::new(e) }
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
|
||||||
/// replace a Command.spawn() error "File not found" with a more readable error
|
/// replace a Command.spawn() error "File not found" with a more readable error
|
||||||
/// to indicate some program is not installed
|
/// to indicate some program is not installed
|
||||||
pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
|
pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
|
||||||
@ -71,63 +93,61 @@ pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
|
|||||||
_ => Error::from(err),
|
_ => Error::from(err),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ProcWaitReader {
|
||||||
|
proce: Child,
|
||||||
|
}
|
||||||
|
impl Read for ProcWaitReader {
|
||||||
|
fn read(&mut self, _buf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
|
let status = self.proce.wait()?;
|
||||||
|
if status.success() {
|
||||||
|
Ok(0)
|
||||||
|
} else {
|
||||||
|
Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::Other,
|
||||||
|
format_err!("subprocess failed: {:?}", status),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
pub fn pipe_output(
|
pub fn pipe_output(
|
||||||
line_prefix: &str,
|
_line_prefix: &str,
|
||||||
mut cmd: Command,
|
mut cmd: Command,
|
||||||
inp: &mut (dyn Read),
|
inp: &mut (dyn Read),
|
||||||
oup: &mut (dyn Write + Send),
|
|
||||||
exe_name: &str,
|
exe_name: &str,
|
||||||
help: &str,
|
help: &str,
|
||||||
cp: fn(line_prefix: &str, &mut dyn Read, &mut dyn Write) -> Result<()>,
|
) -> Result<ReadBox> {
|
||||||
) -> Result<()> {
|
|
||||||
let mut cmd = cmd
|
let mut cmd = cmd
|
||||||
.stdin(Stdio::piped())
|
.stdin(Stdio::piped())
|
||||||
.stdout(Stdio::piped())
|
.stdout(Stdio::piped())
|
||||||
.spawn()
|
.spawn()
|
||||||
.map_err(|e| map_exe_error(e, exe_name, help))?;
|
.map_err(|e| map_exe_error(e, exe_name, help))?;
|
||||||
let mut stdi = cmd.stdin.take().expect("is piped");
|
let mut stdi = cmd.stdin.take().expect("is piped");
|
||||||
let mut stdo = cmd.stdout.take().expect("is piped");
|
let stdo = cmd.stdout.take().expect("is piped");
|
||||||
|
|
||||||
// TODO: how to handle this copying better?
|
// TODO: how to handle this copying better?
|
||||||
// do we really need threads for this?
|
// do we really need threads for this?
|
||||||
crossbeam::scope(|s| -> Result<()> {
|
crossbeam::scope(|_s| -> Result<()> {
|
||||||
s.spawn(|_| cp(line_prefix, &mut stdo, oup).unwrap()); // errors?
|
|
||||||
std::io::copy(inp, &mut stdi)?;
|
std::io::copy(inp, &mut stdi)?;
|
||||||
drop(stdi); // NEEDED! otherwise deadlock
|
drop(stdi); // NEEDED! otherwise deadlock
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
let status = cmd.wait()?;
|
Ok(Box::new(stdo.chain(ProcWaitReader { proce: cmd })))
|
||||||
if status.success() {
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
Err(format_err!("subprocess failed: {:?}", status))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> FileAdapter for T
|
impl FileAdapter for SpawningFileAdapter {
|
||||||
where
|
fn adapt(&self, ai: AdaptInfo, _detection_reason: &SlowMatcher) -> Result<ReadBox> {
|
||||||
T: SpawningFileAdapter,
|
|
||||||
{
|
|
||||||
fn adapt(&self, ai: AdaptInfo, _detection_reason: &SlowMatcher) -> Result<()> {
|
|
||||||
let AdaptInfo {
|
let AdaptInfo {
|
||||||
filepath_hint,
|
filepath_hint,
|
||||||
mut inp,
|
mut inp,
|
||||||
oup,
|
|
||||||
line_prefix,
|
line_prefix,
|
||||||
..
|
..
|
||||||
} = ai;
|
} = ai;
|
||||||
let cmd = Command::new(self.get_exe());
|
|
||||||
let cmd = self.command(filepath_hint, cmd);
|
let cmd = Command::new(self.inner.get_exe());
|
||||||
|
let cmd = self.inner.command(&filepath_hint, cmd);
|
||||||
debug!("executing {:?}", cmd);
|
debug!("executing {:?}", cmd);
|
||||||
pipe_output(
|
pipe_output(&line_prefix, cmd, &mut inp, self.inner.get_exe(), "")
|
||||||
line_prefix,
|
|
||||||
cmd,
|
|
||||||
&mut inp,
|
|
||||||
oup,
|
|
||||||
self.get_exe(),
|
|
||||||
"",
|
|
||||||
Self::postproc,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@ use log::*;
|
|||||||
use rusqlite::types::ValueRef;
|
use rusqlite::types::ValueRef;
|
||||||
use rusqlite::*;
|
use rusqlite::*;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
use writing::{WritingFileAdapter, WritingFileAdapterTrait};
|
||||||
|
|
||||||
static EXTENSIONS: &[&str] = &["db", "db3", "sqlite", "sqlite3"];
|
static EXTENSIONS: &[&str] = &["db", "db3", "sqlite", "sqlite3"];
|
||||||
|
|
||||||
@ -27,12 +28,12 @@ lazy_static! {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default, Clone)]
|
||||||
pub struct SqliteAdapter;
|
pub struct SqliteAdapter;
|
||||||
|
|
||||||
impl SqliteAdapter {
|
impl SqliteAdapter {
|
||||||
pub fn new() -> SqliteAdapter {
|
pub fn new() -> WritingFileAdapter {
|
||||||
SqliteAdapter
|
WritingFileAdapter::new(Box::new(SqliteAdapter {}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl GetMetadata for SqliteAdapter {
|
impl GetMetadata for SqliteAdapter {
|
||||||
@ -58,12 +59,16 @@ fn format_blob(b: ValueRef) -> String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FileAdapter for SqliteAdapter {
|
impl WritingFileAdapterTrait for SqliteAdapter {
|
||||||
fn adapt(&self, ai: AdaptInfo, _detection_reason: &SlowMatcher) -> Result<()> {
|
fn adapt_write(
|
||||||
|
&self,
|
||||||
|
ai: AdaptInfo,
|
||||||
|
_detection_reason: &SlowMatcher,
|
||||||
|
oup: &mut dyn Write,
|
||||||
|
) -> Result<()> {
|
||||||
let AdaptInfo {
|
let AdaptInfo {
|
||||||
is_real_file,
|
is_real_file,
|
||||||
filepath_hint,
|
filepath_hint,
|
||||||
oup,
|
|
||||||
line_prefix,
|
line_prefix,
|
||||||
..
|
..
|
||||||
} = ai;
|
} = ai;
|
||||||
@ -116,3 +121,29 @@ impl FileAdapter for SqliteAdapter {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
use crate::{test_utils::*};
|
||||||
|
use std::{fs::File};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn simple() -> Result<()> {
|
||||||
|
let adapter: Box<dyn FileAdapter> = Box::new(SqliteAdapter::new());
|
||||||
|
let fname = test_data_dir().join("hello.sqlite3");
|
||||||
|
let rd = File::open(&fname)?;
|
||||||
|
let (a, d) = simple_adapt_info(&fname, Box::new(rd));
|
||||||
|
let mut res = adapter.adapt(a, &d)?;
|
||||||
|
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
res.read_to_end(&mut buf)?;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
String::from_utf8(buf)?,
|
||||||
|
"PREFIX:tbl: greeting='hello', from='sqlite database!'\nPREFIX:tbl2: x=123, y=456.789\n",
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use super::*;
|
use super::*;
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use spawning::SpawningFileAdapter;
|
use spawning::{SpawningFileAdapter, SpawningFileAdapterTrait};
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
|
|
||||||
static EXTENSIONS: &[&str] = &["jpg", "png"];
|
static EXTENSIONS: &[&str] = &["jpg", "png"];
|
||||||
@ -33,7 +33,7 @@ impl GetMetadata for TesseractAdapter {
|
|||||||
&METADATA
|
&METADATA
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl SpawningFileAdapter for TesseractAdapter {
|
impl SpawningFileAdapterTrait for TesseractAdapter {
|
||||||
fn get_exe(&self) -> &str {
|
fn get_exe(&self) -> &str {
|
||||||
"tesseract"
|
"tesseract"
|
||||||
}
|
}
|
||||||
|
51
src/adapters/writing.rs
Normal file
51
src/adapters/writing.rs
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
use super::{FileAdapter, GetMetadata, ReadBox};
|
||||||
|
use anyhow::Result;
|
||||||
|
use std::io::Write;
|
||||||
|
|
||||||
|
#[dyn_clonable::clonable]
|
||||||
|
pub trait WritingFileAdapterTrait: GetMetadata + Send + Clone {
|
||||||
|
fn adapt_write(
|
||||||
|
&self,
|
||||||
|
a: super::AdaptInfo,
|
||||||
|
detection_reason: &crate::matching::SlowMatcher,
|
||||||
|
oup: &mut dyn Write,
|
||||||
|
) -> Result<()>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct WritingFileAdapter {
|
||||||
|
inner: Box<dyn WritingFileAdapterTrait>,
|
||||||
|
}
|
||||||
|
impl WritingFileAdapter {
|
||||||
|
pub fn new(inner: Box<dyn WritingFileAdapterTrait>) -> WritingFileAdapter {
|
||||||
|
WritingFileAdapter { inner }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GetMetadata for WritingFileAdapter {
|
||||||
|
fn metadata(&self) -> &super::AdapterMeta {
|
||||||
|
self.inner.metadata()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileAdapter for WritingFileAdapter {
|
||||||
|
fn adapt(
|
||||||
|
&self,
|
||||||
|
a: super::AdaptInfo,
|
||||||
|
detection_reason: &crate::matching::SlowMatcher,
|
||||||
|
) -> anyhow::Result<ReadBox> {
|
||||||
|
let (r, w) = crate::pipe::pipe();
|
||||||
|
let cc = self.inner.clone();
|
||||||
|
let detc = detection_reason.clone();
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
let mut oup = w;
|
||||||
|
let ai = a;
|
||||||
|
let res = cc.adapt_write(ai, &detc, &mut oup);
|
||||||
|
if let Err(e) = res {
|
||||||
|
oup.write_err(std::io::Error::new(std::io::ErrorKind::Other, e))
|
||||||
|
.expect("could not write err");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(Box::new(r))
|
||||||
|
}
|
||||||
|
}
|
@ -85,7 +85,7 @@ impl FromStr for CacheMaxBlobLen {
|
|||||||
///
|
///
|
||||||
/// 1. describing the command line arguments using structopt+clap and for man page / readme generation
|
/// 1. describing the command line arguments using structopt+clap and for man page / readme generation
|
||||||
/// 2. describing the config file format (output as JSON schema via schemars)
|
/// 2. describing the config file format (output as JSON schema via schemars)
|
||||||
#[derive(StructOpt, Debug, Deserialize, Serialize, JsonSchema, Default)]
|
#[derive(StructOpt, Debug, Deserialize, Serialize, JsonSchema, Default, Clone)]
|
||||||
#[structopt(
|
#[structopt(
|
||||||
name = "ripgrep-all",
|
name = "ripgrep-all",
|
||||||
rename_all = "kebab-case",
|
rename_all = "kebab-case",
|
||||||
|
@ -16,7 +16,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
std::env::current_dir()?.join(&filepath)
|
std::env::current_dir()?.join(&filepath)
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut i = File::open(&path)?;
|
let i = File::open(&path)?;
|
||||||
let mut o = std::io::stdout();
|
let mut o = std::io::stdout();
|
||||||
let cache = if args.no_cache {
|
let cache = if args.no_cache {
|
||||||
None
|
None
|
||||||
@ -24,14 +24,14 @@ fn main() -> anyhow::Result<()> {
|
|||||||
Some(rga::preproc_cache::open().context("could not open cache")?)
|
Some(rga::preproc_cache::open().context("could not open cache")?)
|
||||||
};
|
};
|
||||||
let ai = AdaptInfo {
|
let ai = AdaptInfo {
|
||||||
inp: &mut i,
|
inp: Box::new(i),
|
||||||
filepath_hint: &path,
|
filepath_hint: path,
|
||||||
is_real_file: true,
|
is_real_file: true,
|
||||||
oup: &mut o,
|
line_prefix: "".to_string(),
|
||||||
line_prefix: "",
|
|
||||||
archive_recursion_depth: 0,
|
archive_recursion_depth: 0,
|
||||||
config: PreprocConfig { cache, args: &args },
|
config: PreprocConfig { cache, args },
|
||||||
};
|
};
|
||||||
rga_preproc(ai)?;
|
let mut oup = rga_preproc(ai)?;
|
||||||
|
std::io::copy(&mut oup, &mut o).context("copying adapter output to stdout")?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,16 @@
|
|||||||
#![warn(clippy::all)]
|
#![warn(clippy::all)]
|
||||||
|
#![feature(negative_impls)]
|
||||||
|
#![feature(specialization)]
|
||||||
|
|
||||||
pub mod adapters;
|
pub mod adapters;
|
||||||
pub mod args;
|
pub mod args;
|
||||||
mod caching_writer;
|
mod caching_writer;
|
||||||
pub mod matching;
|
pub mod matching;
|
||||||
|
pub mod pipe;
|
||||||
pub mod preproc;
|
pub mod preproc;
|
||||||
pub mod preproc_cache;
|
pub mod preproc_cache;
|
||||||
|
#[cfg(test)]
|
||||||
|
pub mod test_utils;
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
pub use caching_writer::CachingWriter;
|
pub use caching_writer::CachingWriter;
|
||||||
|
@ -33,6 +33,12 @@ pub enum SlowMatcher {
|
|||||||
MimeType(String),
|
MimeType(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<FastMatcher> for SlowMatcher {
|
||||||
|
fn from(t: FastMatcher) -> Self {
|
||||||
|
SlowMatcher::Fast(t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct FileMeta {
|
pub struct FileMeta {
|
||||||
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
|
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
|
||||||
// and since we probably only want to do only matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
|
// and since we probably only want to do only matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
|
||||||
|
196
src/pipe.rs
Normal file
196
src/pipe.rs
Normal file
@ -0,0 +1,196 @@
|
|||||||
|
// https://github.com/arcnmx/pipe-rs/blob/master/src/lib.rs
|
||||||
|
// extended to support sending io errors
|
||||||
|
|
||||||
|
#![deny(missing_docs)]
|
||||||
|
#![doc(html_root_url = "https://docs.rs/pipe/0.3.0")]
|
||||||
|
#![cfg_attr(feature = "unstable-doc-cfg", feature(doc_cfg))]
|
||||||
|
|
||||||
|
//! Synchronous in-memory pipe
|
||||||
|
//!
|
||||||
|
//! ## Example
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use std::thread::spawn;
|
||||||
|
//! use std::io::{Read, Write};
|
||||||
|
//!
|
||||||
|
//! let (mut read, mut write) = ripgrep_all::pipe::pipe();
|
||||||
|
//!
|
||||||
|
//! let message = "Hello, world!";
|
||||||
|
//! spawn(move || write.write_all(message.as_bytes()).unwrap());
|
||||||
|
//!
|
||||||
|
//! let mut s = String::new();
|
||||||
|
//! read.read_to_string(&mut s).unwrap();
|
||||||
|
//!
|
||||||
|
//! assert_eq!(&s, message);
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use crossbeam_channel::{Receiver, Sender};
|
||||||
|
use std::cmp::min;
|
||||||
|
use std::io::{self, BufRead, Read, Result, Write};
|
||||||
|
|
||||||
|
/// The `Read` end of a pipe (see `pipe()`)
|
||||||
|
pub struct PipeReader {
|
||||||
|
receiver: Receiver<Result<Vec<u8>>>,
|
||||||
|
buffer: Vec<u8>,
|
||||||
|
position: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The `Write` end of a pipe (see `pipe()`)
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct PipeWriter {
|
||||||
|
sender: Sender<Result<Vec<u8>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a synchronous memory pipe
|
||||||
|
pub fn pipe() -> (PipeReader, PipeWriter) {
|
||||||
|
let (sender, receiver) = crossbeam_channel::bounded(0);
|
||||||
|
|
||||||
|
(
|
||||||
|
PipeReader {
|
||||||
|
receiver,
|
||||||
|
buffer: Vec::new(),
|
||||||
|
position: 0,
|
||||||
|
},
|
||||||
|
PipeWriter { sender },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PipeWriter {
|
||||||
|
/// Extracts the inner `SyncSender` from the writer
|
||||||
|
pub fn into_inner(self) -> Sender<Result<Vec<u8>>> {
|
||||||
|
self.sender
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write any error into the pipe, will be handled as an IO error
|
||||||
|
pub fn write_err(&self, e: std::io::Error) -> Result<()> {
|
||||||
|
self.sender
|
||||||
|
.send(Err(e))
|
||||||
|
.map_err(|_| io::Error::new(io::ErrorKind::BrokenPipe, "pipe reader has been dropped"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PipeReader {
|
||||||
|
/// Extracts the inner `Receiver` from the writer, and any pending buffered data
|
||||||
|
pub fn into_inner(mut self) -> (Receiver<Result<Vec<u8>>>, Vec<u8>) {
|
||||||
|
self.buffer.drain(..self.position);
|
||||||
|
(self.receiver, self.buffer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BufRead for PipeReader {
|
||||||
|
fn fill_buf(&mut self) -> io::Result<&[u8]> {
|
||||||
|
while self.position >= self.buffer.len() {
|
||||||
|
match self.receiver.recv() {
|
||||||
|
// The only existing error is EOF
|
||||||
|
Err(_) => break,
|
||||||
|
Ok(Err(e)) => Err(e)?,
|
||||||
|
Ok(Ok(data)) => {
|
||||||
|
self.buffer = data;
|
||||||
|
self.position = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(&self.buffer[self.position..])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume(&mut self, amt: usize) {
|
||||||
|
debug_assert!(self.buffer.len() - self.position >= amt);
|
||||||
|
self.position += amt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Read for PipeReader {
|
||||||
|
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||||
|
if buf.is_empty() {
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
let internal = self.fill_buf()?;
|
||||||
|
|
||||||
|
let len = min(buf.len(), internal.len());
|
||||||
|
if len > 0 {
|
||||||
|
buf[..len].copy_from_slice(&internal[..len]);
|
||||||
|
self.consume(len);
|
||||||
|
}
|
||||||
|
Ok(len)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Write for PipeWriter {
|
||||||
|
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||||
|
let data = buf.to_vec();
|
||||||
|
|
||||||
|
self.sender
|
||||||
|
.send(Ok(data))
|
||||||
|
.map(|_| buf.len())
|
||||||
|
.map_err(|_| io::Error::new(io::ErrorKind::BrokenPipe, "pipe reader has been dropped"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flush(&mut self) -> io::Result<()> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::io::{Read, Write};
|
||||||
|
use std::thread::spawn;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pipe_reader() {
|
||||||
|
let i = b"hello there";
|
||||||
|
let mut o = Vec::with_capacity(i.len());
|
||||||
|
let (mut r, mut w) = pipe();
|
||||||
|
let guard = spawn(move || {
|
||||||
|
w.write_all(&i[..5]).unwrap();
|
||||||
|
w.write_all(&i[5..]).unwrap();
|
||||||
|
drop(w);
|
||||||
|
});
|
||||||
|
|
||||||
|
r.read_to_end(&mut o).unwrap();
|
||||||
|
assert_eq!(i, &o[..]);
|
||||||
|
|
||||||
|
guard.join().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pipe_writer_fail() {
|
||||||
|
let i = b"hi";
|
||||||
|
let (r, mut w) = pipe();
|
||||||
|
let guard = spawn(move || {
|
||||||
|
drop(r);
|
||||||
|
});
|
||||||
|
|
||||||
|
assert!(w.write_all(i).is_err());
|
||||||
|
|
||||||
|
guard.join().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn small_reads() {
|
||||||
|
let block_cnt = 20;
|
||||||
|
const BLOCK: usize = 20;
|
||||||
|
let (mut r, mut w) = pipe();
|
||||||
|
let guard = spawn(move || {
|
||||||
|
for _ in 0..block_cnt {
|
||||||
|
let data = &[0; BLOCK];
|
||||||
|
w.write_all(data).unwrap();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut buff = [0; BLOCK / 2];
|
||||||
|
let mut read = 0;
|
||||||
|
while let Ok(size) = r.read(&mut buff) {
|
||||||
|
// 0 means EOF
|
||||||
|
if size == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
read += size;
|
||||||
|
}
|
||||||
|
assert_eq!(block_cnt * BLOCK, read);
|
||||||
|
|
||||||
|
guard.join().unwrap();
|
||||||
|
}
|
||||||
|
}
|
105
src/preproc.rs
105
src/preproc.rs
@ -4,20 +4,20 @@ use crate::matching::*;
|
|||||||
use crate::{print_bytes, print_dur, CachingWriter};
|
use crate::{print_bytes, print_dur, CachingWriter};
|
||||||
use anyhow::*;
|
use anyhow::*;
|
||||||
use log::*;
|
use log::*;
|
||||||
use path_clean::PathClean;
|
|
||||||
use std::convert::TryInto;
|
|
||||||
use std::io::BufRead;
|
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
use std::io::BufWriter;
|
|
||||||
use std::{
|
use std::{
|
||||||
sync::{Arc, RwLock},
|
sync::{Arc, RwLock},
|
||||||
time::Instant,
|
time::Instant,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct PreprocConfig<'a> {
|
pub struct PreprocConfig {
|
||||||
pub cache: Option<Arc<RwLock<dyn crate::preproc_cache::PreprocCache>>>,
|
pub cache: Option<Arc<RwLock<dyn crate::preproc_cache::PreprocCache>>>,
|
||||||
pub args: &'a RgaConfig,
|
pub args: RgaConfig,
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* preprocess a file as defined in `ai`.
|
* preprocess a file as defined in `ai`.
|
||||||
@ -25,19 +25,18 @@ pub struct PreprocConfig<'a> {
|
|||||||
* If a cache is passed, read/write to it.
|
* If a cache is passed, read/write to it.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
|
pub fn rga_preproc(ai: AdaptInfo) -> Result<ReadBox> {
|
||||||
let AdaptInfo {
|
let AdaptInfo {
|
||||||
filepath_hint,
|
filepath_hint,
|
||||||
is_real_file,
|
is_real_file,
|
||||||
inp,
|
inp,
|
||||||
oup,
|
|
||||||
line_prefix,
|
line_prefix,
|
||||||
config,
|
config,
|
||||||
archive_recursion_depth,
|
archive_recursion_depth,
|
||||||
..
|
..
|
||||||
} = ai;
|
} = ai;
|
||||||
debug!("path (hint) to preprocess: {:?}", filepath_hint);
|
debug!("path (hint) to preprocess: {:?}", filepath_hint);
|
||||||
let PreprocConfig { mut cache, args } = config;
|
let PreprocConfig { cache: _, args } = config;
|
||||||
let filtered_adapters = get_adapters_filtered(args.custom_adapters.clone(), &args.adapters)?;
|
let filtered_adapters = get_adapters_filtered(args.custom_adapters.clone(), &args.adapters)?;
|
||||||
let adapters = adapter_matcher(&filtered_adapters, args.accurate)?;
|
let adapters = adapter_matcher(&filtered_adapters, args.accurate)?;
|
||||||
let filename = filepath_hint
|
let filename = filepath_hint
|
||||||
@ -45,22 +44,22 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
|
|||||||
.ok_or_else(|| format_err!("Empty filename"))?;
|
.ok_or_else(|| format_err!("Empty filename"))?;
|
||||||
debug!("Archive recursion depth: {}", archive_recursion_depth);
|
debug!("Archive recursion depth: {}", archive_recursion_depth);
|
||||||
if archive_recursion_depth >= args.max_archive_recursion.0 {
|
if archive_recursion_depth >= args.max_archive_recursion.0 {
|
||||||
writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?;
|
let s = format!("{}[rga: max archive recursion reached]", line_prefix).into_bytes();
|
||||||
return Ok(());
|
return Ok(Box::new(std::io::Cursor::new(s)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo: figure out when using a bufreader is a good idea and when it is not
|
// todo: figure out when using a bufreader is a good idea and when it is not
|
||||||
// seems to be good for File::open() reads, but not sure about within archives (tar, zip)
|
// seems to be good for File::open() reads, but not sure about within archives (tar, zip)
|
||||||
let inp = &mut BufReader::with_capacity(1 << 13, inp);
|
let inp = BufReader::with_capacity(1 << 16, inp);
|
||||||
|
|
||||||
let mimetype = if args.accurate {
|
let mimetype = None; /*if args.accurate {
|
||||||
let buf = inp.fill_buf()?; // fill but do not consume!
|
let buf = inp.fill_buf()?; // fill but do not consume!
|
||||||
let mimetype = tree_magic::from_u8(buf);
|
let mimetype = tree_magic::from_u8(buf);
|
||||||
debug!("mimetype: {:?}", mimetype);
|
debug!("mimetype: {:?}", mimetype);
|
||||||
Some(mimetype)
|
Some(mimetype)
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};*/
|
||||||
let adapter = adapters(FileMeta {
|
let adapter = adapters(FileMeta {
|
||||||
mimetype,
|
mimetype,
|
||||||
lossy_filename: filename.to_string_lossy().to_string(),
|
lossy_filename: filename.to_string_lossy().to_string(),
|
||||||
@ -77,8 +76,8 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
|
|||||||
filepath_hint.to_string_lossy(),
|
filepath_hint.to_string_lossy(),
|
||||||
&meta.name
|
&meta.name
|
||||||
);
|
);
|
||||||
let db_name = format!("{}.v{}", meta.name, meta.version);
|
let _db_name = format!("{}.v{}", meta.name, meta.version);
|
||||||
if let Some(cache) = cache.as_mut() {
|
/*if let Some(cache) = cache.as_mut() {
|
||||||
let cache_key: Vec<u8> = {
|
let cache_key: Vec<u8> = {
|
||||||
let clean_path = filepath_hint.to_owned().clean();
|
let clean_path = filepath_hint.to_owned().clean();
|
||||||
let meta = std::fs::metadata(&filepath_hint)?;
|
let meta = std::fs::metadata(&filepath_hint)?;
|
||||||
@ -160,45 +159,43 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<()> {
|
|||||||
}),
|
}),
|
||||||
)?;
|
)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else { */
|
||||||
// no cache arc - probably within archive
|
// no cache arc - probably within archive
|
||||||
debug!("adapting without caching...");
|
debug!("adapting without caching...");
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
adapter
|
let oread = adapter
|
||||||
.adapt(
|
.adapt(
|
||||||
AdaptInfo {
|
AdaptInfo {
|
||||||
line_prefix,
|
line_prefix,
|
||||||
filepath_hint,
|
filepath_hint: filepath_hint.clone(),
|
||||||
is_real_file,
|
is_real_file,
|
||||||
inp,
|
inp: Box::new(inp),
|
||||||
oup,
|
archive_recursion_depth,
|
||||||
archive_recursion_depth,
|
config: PreprocConfig { cache: None, args },
|
||||||
config: PreprocConfig { cache: None, args },
|
},
|
||||||
},
|
&detection_reason,
|
||||||
&detection_reason,
|
)
|
||||||
|
.with_context(|| {
|
||||||
|
format!(
|
||||||
|
"adapting {} via {} without caching failed",
|
||||||
|
filepath_hint.to_string_lossy(),
|
||||||
|
meta.name
|
||||||
)
|
)
|
||||||
.with_context(|| {
|
})?;
|
||||||
format!(
|
debug!(
|
||||||
"adapting {} via {} without caching failed",
|
"running adapter {} took {}",
|
||||||
filepath_hint.to_string_lossy(),
|
adapter.metadata().name,
|
||||||
meta.name
|
print_dur(start)
|
||||||
)
|
);
|
||||||
})?;
|
Ok(oread)
|
||||||
debug!(
|
/* }*/
|
||||||
"running adapter {} took {}",
|
|
||||||
adapter.metadata().name,
|
|
||||||
print_dur(start)
|
|
||||||
);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
// allow passthrough if the file is in an archive or accurate matching is enabled
|
// allow passthrough if the file is in an archive or accurate matching is enabled
|
||||||
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
|
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
|
||||||
let allow_cat = !is_real_file || args.accurate;
|
let allow_cat = !is_real_file || args.accurate;
|
||||||
if allow_cat {
|
if allow_cat {
|
||||||
spawning::postproc_line_prefix(line_prefix, inp, oup)?;
|
Ok(Box::new(inp))
|
||||||
Ok(())
|
|
||||||
} else {
|
} else {
|
||||||
Err(format_err!(
|
Err(format_err!(
|
||||||
"No adapter found for file {:?}, passthrough disabled.",
|
"No adapter found for file {:?}, passthrough disabled.",
|
||||||
|
@ -10,7 +10,7 @@ use std::{
|
|||||||
pub fn open() -> Result<Arc<RwLock<dyn PreprocCache>>> {
|
pub fn open() -> Result<Arc<RwLock<dyn PreprocCache>>> {
|
||||||
Ok(Arc::new(RwLock::new(LmdbCache::open()?)))
|
Ok(Arc::new(RwLock::new(LmdbCache::open()?)))
|
||||||
}
|
}
|
||||||
pub trait PreprocCache {
|
pub trait PreprocCache: Send + Sync {
|
||||||
// possible without second lambda?
|
// possible without second lambda?
|
||||||
fn get_or_run<'a>(
|
fn get_or_run<'a>(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
33
src/test_utils.rs
Normal file
33
src/test_utils.rs
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
use crate::{
|
||||||
|
adapters::{AdaptInfo, ReadBox},
|
||||||
|
args::RgaConfig,
|
||||||
|
matching::{FastMatcher, SlowMatcher},
|
||||||
|
preproc::PreprocConfig,
|
||||||
|
};
|
||||||
|
use std::{
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn test_data_dir() -> PathBuf {
|
||||||
|
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||||
|
d.push("exampledir/test/");
|
||||||
|
d
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn simple_adapt_info(filepath: &Path, inp: ReadBox) -> (AdaptInfo, SlowMatcher) {
|
||||||
|
(
|
||||||
|
AdaptInfo {
|
||||||
|
filepath_hint: filepath.to_owned(),
|
||||||
|
is_real_file: true,
|
||||||
|
archive_recursion_depth: 0,
|
||||||
|
inp,
|
||||||
|
line_prefix: "PREFIX:".to_string(),
|
||||||
|
config: PreprocConfig {
|
||||||
|
cache: None,
|
||||||
|
args: RgaConfig::default(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
FastMatcher::FileExtension(filepath.extension().unwrap().to_string_lossy().into_owned())
|
||||||
|
.into(),
|
||||||
|
)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user