mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-09 14:30:37 +00:00
binary file detection
This commit is contained in:
parent
12383b0c21
commit
d5606094f5
20
Cargo.lock
generated
20
Cargo.lock
generated
@ -276,6 +276,22 @@ name = "either"
|
||||
version = "1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs_io"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.6.1"
|
||||
@ -779,6 +795,8 @@ dependencies = [
|
||||
"chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"crossbeam 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs_io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"flate2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -1162,6 +1180,8 @@ dependencies = [
|
||||
"checksum crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2760899e32a1d58d5abb31129f8fae5de75220bc2176e77ff7c627ae45c918d9"
|
||||
"checksum crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "f8306fcef4a7b563b76b7dd949ca48f52bc1141aa067d2ea09565f3e2652aa5c"
|
||||
"checksum either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5527cfe0d098f36e3f8839852688e63c8fff1c90b2b405aef730615f9a7bcf7b"
|
||||
"checksum encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)" = "4155785c79f2f6701f185eb2e6b4caf0555ec03477cb4c70db67b465311620ed"
|
||||
"checksum encoding_rs_io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9619ee7a2bf4e777e020b95c1439abaf008f8ea8041b78a0552c4f1bcf4df32c"
|
||||
"checksum env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b61fa891024a945da30a9581546e8cfaf5602c7b3f4c137a2805cf388f92075a"
|
||||
"checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2"
|
||||
"checksum failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1063915fd7ef4309e222a5a07cf9c319fb9c7836b1f89b85458672dbb127e1"
|
||||
|
@ -38,3 +38,5 @@ flate2 = "1.0.7"
|
||||
bzip2 = "0.3.3"
|
||||
tar = "0.4.26"
|
||||
chrono = "0.4.6"
|
||||
encoding_rs = "0.8.17"
|
||||
encoding_rs_io = "0.1.6"
|
||||
|
@ -5,26 +5,32 @@ use std::io::BufReader;
|
||||
use std::process::Command;
|
||||
use std::process::Stdio;
|
||||
|
||||
/**
|
||||
* Copy a Read to a Write, while prefixing every line with a prefix.
|
||||
*
|
||||
* Try to detect binary files and ignore them. Does not ensure any encoding in the output.
|
||||
*/
|
||||
pub fn postproc_line_prefix(
|
||||
line_prefix: &str,
|
||||
inp: &mut dyn Read,
|
||||
oup: &mut dyn Write,
|
||||
) -> Fallible<()> {
|
||||
//std::io::copy(inp, oup)?;
|
||||
|
||||
for line in BufReader::new(inp).lines() {
|
||||
match line {
|
||||
Ok(line) => {
|
||||
oup.write_all(format!("{}{}\n", line_prefix, line).as_bytes())?;
|
||||
}
|
||||
Err(e) => {
|
||||
if e.kind() == std::io::ErrorKind::InvalidData {
|
||||
oup.write_all(format!("{}[binary]\n", line_prefix).as_bytes())?;
|
||||
} else {
|
||||
Err(e)?;
|
||||
}
|
||||
}
|
||||
let mut reader = BufReader::with_capacity(1 << 12, inp);
|
||||
let fourk = reader.fill_buf()?;
|
||||
if fourk.contains(&0u8) {
|
||||
oup.write_all(format!("{}[binary data]\n", line_prefix).as_bytes())?;
|
||||
return Ok(());
|
||||
}
|
||||
// intentionally do not call reader.consume
|
||||
for line in reader.split(b'\n') {
|
||||
let line = line?;
|
||||
if line.contains(&0u8) {
|
||||
oup.write_all(format!("{}[binary data]\n", line_prefix).as_bytes())?;
|
||||
return Ok(());
|
||||
}
|
||||
oup.write_all(line_prefix.as_bytes())?;
|
||||
oup.write_all(&line)?;
|
||||
oup.write_all(b"\n")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@ -44,43 +50,6 @@ pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
|
||||
_ => Error::from(err),
|
||||
}
|
||||
}
|
||||
|
||||
/*fn pipe(a: &mut dyn Read, b: &mut dyn Write, c: &mut dyn Read, d: &mut dyn Write) {
|
||||
let mut buf = vec![0u8; 2 << 13];
|
||||
loop {
|
||||
match a.read(&buf) {
|
||||
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
/*pub fn copy<R: ?Sized, W: ?Sized>(
|
||||
name: &str,
|
||||
reader: &mut R,
|
||||
writer: &mut W,
|
||||
) -> std::io::Result<u64>
|
||||
where
|
||||
R: Read,
|
||||
W: Write,
|
||||
{
|
||||
eprintln!("START COPY {}", name);
|
||||
let mut zz = vec![0; 1 << 13];
|
||||
let mut buf: &mut [u8] = zz.as_mut();
|
||||
let mut written = 0;
|
||||
loop {
|
||||
let r = reader.read(buf);
|
||||
eprintln!("{}read: {:?}", name, r);
|
||||
let len = match r {
|
||||
Ok(0) => return Ok(written),
|
||||
Ok(len) => len,
|
||||
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
writer.write_all(&buf[..len])?;
|
||||
written += len as u64;
|
||||
}
|
||||
}*/
|
||||
|
||||
pub fn pipe_output(
|
||||
line_prefix: &str,
|
||||
mut cmd: Command,
|
||||
@ -98,6 +67,7 @@ pub fn pipe_output(
|
||||
let mut stdi = cmd.stdin.take().expect("is piped");
|
||||
let mut stdo = cmd.stdout.take().expect("is piped");
|
||||
|
||||
// TODO: how to handle this copying better?
|
||||
crossbeam::scope(|s| -> Fallible<()> {
|
||||
s.spawn(|_| cp(line_prefix, &mut stdo, oup).unwrap()); // errors?
|
||||
std::io::copy(inp, &mut stdi)?;
|
||||
|
@ -142,7 +142,8 @@ pub fn rga_preproc<'a>(
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// allow passthrough if the file is in an archive, otherwise it should have been filtered out by rg
|
||||
// allow passthrough if the file is in an archive,
|
||||
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
|
||||
let allow_cat = !is_real_file;
|
||||
if allow_cat {
|
||||
spawning::postproc_line_prefix(line_prefix, inp, oup)?;
|
||||
|
Loading…
Reference in New Issue
Block a user