binary file detection

This commit is contained in:
phiresky 2019-06-07 00:57:53 +02:00
parent 12383b0c21
commit d5606094f5
4 changed files with 45 additions and 52 deletions

20
Cargo.lock generated
View File

@ -276,6 +276,22 @@ name = "either"
version = "1.5.2" version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "encoding_rs"
version = "0.8.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "encoding_rs_io"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "env_logger" name = "env_logger"
version = "0.6.1" version = "0.6.1"
@ -779,6 +795,8 @@ dependencies = [
"chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs_io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
"failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"flate2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", "flate2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1162,6 +1180,8 @@ dependencies = [
"checksum crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2760899e32a1d58d5abb31129f8fae5de75220bc2176e77ff7c627ae45c918d9" "checksum crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2760899e32a1d58d5abb31129f8fae5de75220bc2176e77ff7c627ae45c918d9"
"checksum crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "f8306fcef4a7b563b76b7dd949ca48f52bc1141aa067d2ea09565f3e2652aa5c" "checksum crossbeam-utils 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "f8306fcef4a7b563b76b7dd949ca48f52bc1141aa067d2ea09565f3e2652aa5c"
"checksum either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5527cfe0d098f36e3f8839852688e63c8fff1c90b2b405aef730615f9a7bcf7b" "checksum either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5527cfe0d098f36e3f8839852688e63c8fff1c90b2b405aef730615f9a7bcf7b"
"checksum encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)" = "4155785c79f2f6701f185eb2e6b4caf0555ec03477cb4c70db67b465311620ed"
"checksum encoding_rs_io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9619ee7a2bf4e777e020b95c1439abaf008f8ea8041b78a0552c4f1bcf4df32c"
"checksum env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b61fa891024a945da30a9581546e8cfaf5602c7b3f4c137a2805cf388f92075a" "checksum env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b61fa891024a945da30a9581546e8cfaf5602c7b3f4c137a2805cf388f92075a"
"checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2" "checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2"
"checksum failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1063915fd7ef4309e222a5a07cf9c319fb9c7836b1f89b85458672dbb127e1" "checksum failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1063915fd7ef4309e222a5a07cf9c319fb9c7836b1f89b85458672dbb127e1"

View File

@ -38,3 +38,5 @@ flate2 = "1.0.7"
bzip2 = "0.3.3" bzip2 = "0.3.3"
tar = "0.4.26" tar = "0.4.26"
chrono = "0.4.6" chrono = "0.4.6"
encoding_rs = "0.8.17"
encoding_rs_io = "0.1.6"

View File

@ -5,26 +5,32 @@ use std::io::BufReader;
use std::process::Command; use std::process::Command;
use std::process::Stdio; use std::process::Stdio;
/**
* Copy a Read to a Write, while prefixing every line with a prefix.
*
* Try to detect binary files and ignore them. Does not ensure any encoding in the output.
*/
pub fn postproc_line_prefix( pub fn postproc_line_prefix(
line_prefix: &str, line_prefix: &str,
inp: &mut dyn Read, inp: &mut dyn Read,
oup: &mut dyn Write, oup: &mut dyn Write,
) -> Fallible<()> { ) -> Fallible<()> {
//std::io::copy(inp, oup)?; let mut reader = BufReader::with_capacity(1 << 12, inp);
let fourk = reader.fill_buf()?;
for line in BufReader::new(inp).lines() { if fourk.contains(&0u8) {
match line { oup.write_all(format!("{}[binary data]\n", line_prefix).as_bytes())?;
Ok(line) => { return Ok(());
oup.write_all(format!("{}{}\n", line_prefix, line).as_bytes())?;
}
Err(e) => {
if e.kind() == std::io::ErrorKind::InvalidData {
oup.write_all(format!("{}[binary]\n", line_prefix).as_bytes())?;
} else {
Err(e)?;
}
} }
// intentionally do not call reader.consume
for line in reader.split(b'\n') {
let line = line?;
if line.contains(&0u8) {
oup.write_all(format!("{}[binary data]\n", line_prefix).as_bytes())?;
return Ok(());
} }
oup.write_all(line_prefix.as_bytes())?;
oup.write_all(&line)?;
oup.write_all(b"\n")?;
} }
Ok(()) Ok(())
} }
@ -44,43 +50,6 @@ pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
_ => Error::from(err), _ => Error::from(err),
} }
} }
/*fn pipe(a: &mut dyn Read, b: &mut dyn Write, c: &mut dyn Read, d: &mut dyn Write) {
let mut buf = vec![0u8; 2 << 13];
loop {
match a.read(&buf) {
}
}
}*/
/*pub fn copy<R: ?Sized, W: ?Sized>(
name: &str,
reader: &mut R,
writer: &mut W,
) -> std::io::Result<u64>
where
R: Read,
W: Write,
{
eprintln!("START COPY {}", name);
let mut zz = vec![0; 1 << 13];
let mut buf: &mut [u8] = zz.as_mut();
let mut written = 0;
loop {
let r = reader.read(buf);
eprintln!("{}read: {:?}", name, r);
let len = match r {
Ok(0) => return Ok(written),
Ok(len) => len,
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
writer.write_all(&buf[..len])?;
written += len as u64;
}
}*/
pub fn pipe_output( pub fn pipe_output(
line_prefix: &str, line_prefix: &str,
mut cmd: Command, mut cmd: Command,
@ -98,6 +67,7 @@ pub fn pipe_output(
let mut stdi = cmd.stdin.take().expect("is piped"); let mut stdi = cmd.stdin.take().expect("is piped");
let mut stdo = cmd.stdout.take().expect("is piped"); let mut stdo = cmd.stdout.take().expect("is piped");
// TODO: how to handle this copying better?
crossbeam::scope(|s| -> Fallible<()> { crossbeam::scope(|s| -> Fallible<()> {
s.spawn(|_| cp(line_prefix, &mut stdo, oup).unwrap()); // errors? s.spawn(|_| cp(line_prefix, &mut stdo, oup).unwrap()); // errors?
std::io::copy(inp, &mut stdi)?; std::io::copy(inp, &mut stdi)?;

View File

@ -142,7 +142,8 @@ pub fn rga_preproc<'a>(
} }
} }
None => { None => {
// allow passthrough if the file is in an archive, otherwise it should have been filtered out by rg // allow passthrough if the file is in an archive,
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
let allow_cat = !is_real_file; let allow_cat = !is_real_file;
if allow_cat { if allow_cat {
spawning::postproc_line_prefix(line_prefix, inp, oup)?; spawning::postproc_line_prefix(line_prefix, inp, oup)?;