mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-24 12:24:56 +00:00
remove lifetimes, fix
This commit is contained in:
parent
906043060b
commit
af8cf228b3
@ -1,24 +1,24 @@
|
||||
use crate::adapters::AdaptInfo;
|
||||
|
||||
// TODO: using iterator trait possible?? should basically be Iterator<AdaptInfo>
|
||||
pub trait AdaptedFilesIter {
|
||||
pub trait AdaptedFilesIter: Send {
|
||||
// next takes a 'a-lived reference and returns an AdaptInfo that lives as long as the reference
|
||||
fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>>;
|
||||
fn next<'a>(&'a mut self) -> Option<AdaptInfo>;
|
||||
}
|
||||
|
||||
/// A single AdaptInfo
|
||||
pub struct SingleAdaptedFileAsIter<'a> {
|
||||
ai: Option<AdaptInfo<'a>>,
|
||||
pub struct SingleAdaptedFileAsIter {
|
||||
ai: Option<AdaptInfo>,
|
||||
}
|
||||
impl SingleAdaptedFileAsIter<'_> {
|
||||
pub fn new<'a>(ai: AdaptInfo<'a>) -> SingleAdaptedFileAsIter<'a> {
|
||||
impl SingleAdaptedFileAsIter {
|
||||
pub fn new<'a>(ai: AdaptInfo) -> SingleAdaptedFileAsIter {
|
||||
SingleAdaptedFileAsIter { ai: Some(ai) }
|
||||
}
|
||||
}
|
||||
impl AdaptedFilesIter for SingleAdaptedFileAsIter<'_> {
|
||||
fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>> {
|
||||
impl AdaptedFilesIter for SingleAdaptedFileAsIter {
|
||||
fn next<'a>(&'a mut self) -> Option<AdaptInfo> {
|
||||
self.ai.take()
|
||||
}
|
||||
}
|
||||
|
||||
pub type AdaptedFilesIterBox<'a> = Box<dyn AdaptedFilesIter + 'a>;
|
||||
pub type AdaptedFilesIterBox = Box<dyn AdaptedFilesIter>;
|
||||
|
@ -23,7 +23,7 @@ use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use std::rc::Rc;
|
||||
|
||||
pub type ReadBox<'a> = Pin<Box<dyn AsyncRead + 'a>>;
|
||||
pub type ReadBox = Pin<Box<dyn AsyncRead + Send>>;
|
||||
pub struct AdapterMeta {
|
||||
/// unique short name of this adapter (a-z0-9 only)
|
||||
pub name: String,
|
||||
@ -82,12 +82,12 @@ pub trait FileAdapter: GetMetadata {
|
||||
/// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
|
||||
fn adapt<'a>(
|
||||
&self,
|
||||
a: AdaptInfo<'a>,
|
||||
a: AdaptInfo,
|
||||
detection_reason: &FileMatcher,
|
||||
) -> Result<AdaptedFilesIterBox<'a>>;
|
||||
) -> Result<AdaptedFilesIterBox>;
|
||||
}
|
||||
|
||||
pub struct AdaptInfo<'a> {
|
||||
pub struct AdaptInfo {
|
||||
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
|
||||
pub filepath_hint: PathBuf,
|
||||
/// true if filepath_hint is an actual file on the file system
|
||||
@ -95,11 +95,11 @@ pub struct AdaptInfo<'a> {
|
||||
/// depth at which this file is in archives. 0 for real filesystem
|
||||
pub archive_recursion_depth: i32,
|
||||
/// stream to read the file from. can be from a file or from some decoder
|
||||
pub inp: ReadBox<'a>,
|
||||
pub inp: ReadBox,
|
||||
/// prefix every output line with this string to better indicate the file's location if it is in some archive
|
||||
pub line_prefix: String,
|
||||
pub postprocess: bool,
|
||||
pub config: RgaConfig,
|
||||
pub config: RgaConfig
|
||||
}
|
||||
|
||||
/// (enabledAdapters, disabledAdapters)
|
||||
|
@ -2,15 +2,17 @@ use crate::adapted_iter::SingleAdaptedFileAsIter;
|
||||
|
||||
use super::*;
|
||||
use anyhow::Result;
|
||||
use async_stream::{stream, AsyncStream};
|
||||
use bytes::{Buf, Bytes};
|
||||
use log::*;
|
||||
use tokio_util::io::StreamReader;
|
||||
|
||||
use crate::adapters::FileAdapter;
|
||||
use std::future::Future;
|
||||
use std::path::Path;
|
||||
use std::process::{ExitStatus, Stdio};
|
||||
use std::task::Poll;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::process::Command;
|
||||
use tokio::process::{Child, Command};
|
||||
|
||||
// TODO: don't separate the trait and the struct
|
||||
pub trait SpawningFileAdapterTrait: GetMetadata {
|
||||
@ -52,46 +54,38 @@ pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
|
||||
|
||||
/** waits for a process to finish, returns an io error if the process failed */
|
||||
struct ProcWaitReader {
|
||||
proce: Pin<Box<dyn Future<Output = std::io::Result<ExitStatus>>>>,
|
||||
process: Option<Child>,
|
||||
future: Option<Pin<Box<dyn Future<Output = std::io::Result<ExitStatus>>>>>,
|
||||
}
|
||||
impl AsyncRead for ProcWaitReader {
|
||||
fn poll_read(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut std::task::Context<'_>,
|
||||
_buf: &mut tokio::io::ReadBuf<'_>,
|
||||
) -> std::task::Poll<std::io::Result<()>> {
|
||||
match self.proce.as_mut().poll(cx) {
|
||||
std::task::Poll::Ready(x) => {
|
||||
let x = x?;
|
||||
if x.success() {
|
||||
Poll::Ready(std::io::Result::Ok(()))
|
||||
} else {
|
||||
Poll::Ready(Err(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
format_err!("subprocess failed: {:?}", x),
|
||||
)))
|
||||
}
|
||||
}
|
||||
Poll::Pending => std::task::Poll::Pending,
|
||||
impl ProcWaitReader {
|
||||
fn new(cmd: Child) -> ProcWaitReader {
|
||||
ProcWaitReader {
|
||||
process: Some(cmd),
|
||||
future: None,
|
||||
}
|
||||
/*let status = self.proce.wait();
|
||||
if status.success() {
|
||||
std::io::Result::Ok(0)
|
||||
} else {
|
||||
Err(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
format_err!("subprocess failed: {:?}", status),
|
||||
))
|
||||
}*/
|
||||
}
|
||||
}
|
||||
fn proc_wait(mut child: Child) -> impl AsyncRead {
|
||||
let s = stream! {
|
||||
let res = child.wait().await?;
|
||||
if res.success() {
|
||||
yield std::io::Result::Ok(Bytes::new());
|
||||
} else {
|
||||
yield std::io::Result::Err(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
format_err!("subprocess failed: {:?}", res),
|
||||
));
|
||||
}
|
||||
};
|
||||
StreamReader::new(s)
|
||||
}
|
||||
pub fn pipe_output<'a>(
|
||||
_line_prefix: &str,
|
||||
mut cmd: Command,
|
||||
inp: ReadBox<'a>,
|
||||
inp: ReadBox,
|
||||
exe_name: &str,
|
||||
help: &str,
|
||||
) -> Result<ReadBox<'a>> {
|
||||
) -> Result<ReadBox> {
|
||||
let mut cmd = cmd
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
@ -100,21 +94,19 @@ pub fn pipe_output<'a>(
|
||||
let mut stdi = cmd.stdin.take().expect("is piped");
|
||||
let stdo = cmd.stdout.take().expect("is piped");
|
||||
|
||||
tokio::task::spawn_local(async move {
|
||||
tokio::io::copy(&mut inp, &mut stdi).await;
|
||||
tokio::spawn(async move {
|
||||
let mut z = inp;
|
||||
tokio::io::copy(&mut z, &mut stdi).await;
|
||||
});
|
||||
|
||||
Ok(Box::pin(stdo.chain(ProcWaitReader {
|
||||
proce: Box::pin(cmd.wait()),
|
||||
})))
|
||||
Ok(Box::pin(stdo.chain(proc_wait(cmd))))
|
||||
}
|
||||
|
||||
impl FileAdapter for SpawningFileAdapter {
|
||||
fn adapt<'a>(
|
||||
&self,
|
||||
ai: AdaptInfo<'a>,
|
||||
ai: AdaptInfo,
|
||||
_detection_reason: &FileMatcher,
|
||||
) -> Result<AdaptedFilesIterBox<'a>> {
|
||||
) -> Result<AdaptedFilesIterBox> {
|
||||
let AdaptInfo {
|
||||
filepath_hint,
|
||||
inp,
|
||||
|
@ -5,9 +5,11 @@ use ripgrep_all as rga;
|
||||
|
||||
use anyhow::Context;
|
||||
use log::debug;
|
||||
use std::{fs::File, time::Instant};
|
||||
use std::{time::Instant};
|
||||
use tokio::fs::File;
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
env_logger::init();
|
||||
let mut arg_arr: Vec<std::ffi::OsString> = std::env::args_os().collect();
|
||||
let last = arg_arr.pop().expect("No filename specified");
|
||||
@ -18,10 +20,10 @@ fn main() -> anyhow::Result<()> {
|
||||
std::env::current_dir()?.join(&filepath)
|
||||
};
|
||||
|
||||
let i = File::open(&path).context("Specified input file not found")?;
|
||||
let mut o = std::io::stdout();
|
||||
let i = File::open(&path).await.context("Specified input file not found")?;
|
||||
let mut o = tokio::io::stdout();
|
||||
let ai = AdaptInfo {
|
||||
inp: Box::new(i),
|
||||
inp: Box::pin(i),
|
||||
filepath_hint: path,
|
||||
is_real_file: true,
|
||||
line_prefix: "".to_string(),
|
||||
@ -31,9 +33,9 @@ fn main() -> anyhow::Result<()> {
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
let mut oup = rga_preproc(ai).context("during preprocessing")?;
|
||||
let mut oup = rga_preproc(ai).await.context("during preprocessing")?;
|
||||
debug!("finding and starting adapter took {}", print_dur(start));
|
||||
let res = std::io::copy(&mut oup, &mut o);
|
||||
let res = tokio::io::copy(&mut oup, &mut o).await;
|
||||
if let Err(e) = res {
|
||||
if e.kind() == std::io::ErrorKind::BrokenPipe {
|
||||
// happens if e.g. ripgrep detects binary data in the pipe so it cancels reading
|
||||
|
@ -1,112 +1,72 @@
|
||||
use std::{pin::Pin, task::Poll};
|
||||
use std::pin::Pin;
|
||||
|
||||
use anyhow::Result;
|
||||
use async_compression::tokio::write::ZstdEncoder;
|
||||
use async_stream::stream;
|
||||
|
||||
use log::*;
|
||||
use tokio::{
|
||||
io::{AsyncRead, AsyncWrite, AsyncWriteExt},
|
||||
pin,
|
||||
};
|
||||
use tokio::io::{AsyncRead, AsyncWriteExt};
|
||||
use tokio_stream::StreamExt;
|
||||
use tokio_util::io::{ReaderStream, StreamReader};
|
||||
|
||||
use crate::adapters::ReadBox;
|
||||
|
||||
/**
|
||||
* wrap a writer so that it is passthrough,
|
||||
* wrap a AsyncRead so that it is passthrough,
|
||||
* but also the written data is compressed and written into a buffer,
|
||||
* unless more than max_cache_size bytes is written, then the cache is dropped and it is pure passthrough.
|
||||
*/
|
||||
pub struct CachingReader<R: AsyncRead> {
|
||||
pub fn async_read_and_write_to_cache<'a>(
|
||||
inp: impl AsyncRead + Send +'a,
|
||||
max_cache_size: usize,
|
||||
// set to none if the size goes over the limit
|
||||
zstd_writer: Option<ZstdEncoder<Vec<u8>>>,
|
||||
inp: Pin<Box<R>>,
|
||||
bytes_written: u64,
|
||||
compression_level: i32,
|
||||
on_finish: Box<dyn FnOnce((u64, Option<Vec<u8>>)) -> Result<()> + Send>,
|
||||
}
|
||||
impl<R: AsyncRead> CachingReader<R> {
|
||||
pub fn new(
|
||||
inp: R,
|
||||
max_cache_size: usize,
|
||||
compression_level: i32,
|
||||
on_finish: Box<dyn FnOnce((u64, Option<Vec<u8>>)) -> Result<()> + Send>,
|
||||
) -> Result<CachingReader<R>> {
|
||||
Ok(CachingReader {
|
||||
inp: Box::pin(inp),
|
||||
max_cache_size,
|
||||
zstd_writer: Some(ZstdEncoder::with_quality(
|
||||
Vec::new(),
|
||||
async_compression::Level::Precise(compression_level as u32),
|
||||
)),
|
||||
bytes_written: 0,
|
||||
on_finish,
|
||||
})
|
||||
}
|
||||
pub fn finish(
|
||||
&mut self,
|
||||
cx: &mut std::task::Context<'_>,
|
||||
) -> std::io::Result<(u64, Option<Vec<u8>>)> {
|
||||
if let Some(writer) = self.zstd_writer.take() {
|
||||
pin!(writer);
|
||||
writer.as_mut().poll_shutdown(cx)?;
|
||||
let res = writer.get_pin_mut().clone(); // TODO: without copying possible?
|
||||
if res.len() <= self.max_cache_size {
|
||||
return Ok((self.bytes_written, Some(res)));
|
||||
}
|
||||
}
|
||||
Ok((self.bytes_written, None))
|
||||
}
|
||||
async fn write_to_compressed(&mut self, buf: &[u8]) -> std::io::Result<()> {
|
||||
if let Some(writer) = self.zstd_writer.as_mut() {
|
||||
writer.write_all(buf).await?;
|
||||
let compressed_len = writer.get_ref().len();
|
||||
trace!("wrote {} to zstd, len now {}", buf.len(), compressed_len);
|
||||
if compressed_len > self.max_cache_size {
|
||||
debug!("cache longer than max, dropping");
|
||||
//writer.finish();
|
||||
self.zstd_writer.take();
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
impl<R> AsyncRead for CachingReader<R>
|
||||
where
|
||||
R: AsyncRead,
|
||||
{
|
||||
fn poll_read(
|
||||
mut self: std::pin::Pin<&mut Self>,
|
||||
cx: &mut std::task::Context<'_>,
|
||||
mut buf: &mut tokio::io::ReadBuf<'_>,
|
||||
) -> std::task::Poll<std::io::Result<()>> {
|
||||
let old_filled_len = buf.filled().len();
|
||||
match self.inp.as_mut().poll_read(cx, &mut buf) {
|
||||
/*Ok(0) => {
|
||||
) -> Result<Pin<Box<dyn AsyncRead + Send +'a>>> {
|
||||
let inp = Box::pin(inp);
|
||||
let mut zstd_writer = Some(ZstdEncoder::with_quality(
|
||||
Vec::new(),
|
||||
async_compression::Level::Precise(compression_level as u32),
|
||||
));
|
||||
let mut bytes_written = 0;
|
||||
|
||||
}
|
||||
Ok(read_bytes) => {
|
||||
self.write_to_compressed(&buf[0..read_bytes])?;
|
||||
self.bytes_written += read_bytes as u64;
|
||||
Ok(read_bytes)
|
||||
}*/
|
||||
Poll::Ready(rdy) => {
|
||||
if let Ok(()) = &rdy {
|
||||
let slice = buf.filled();
|
||||
let read_bytes = slice.len() - old_filled_len;
|
||||
if read_bytes == 0 {
|
||||
// EOF
|
||||
// move out of box, replace with noop lambda
|
||||
let on_finish =
|
||||
std::mem::replace(&mut self.on_finish, Box::new(|_| Ok(())));
|
||||
// EOF, finish!
|
||||
(on_finish)(self.finish(cx)?)
|
||||
.map(|()| 0)
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
|
||||
} else {
|
||||
self.write_to_compressed(&slice[old_filled_len..]);
|
||||
self.bytes_written += read_bytes as u64;
|
||||
let s = stream! {
|
||||
let mut stream = ReaderStream::new(inp);
|
||||
while let Some(bytes) = stream.next().await {
|
||||
if let Ok(bytes) = &bytes {
|
||||
if let Some(writer) = zstd_writer.as_mut() {
|
||||
writer.write_all(&bytes).await?;
|
||||
bytes_written += bytes.len() as u64;
|
||||
let compressed_len = writer.get_ref().len();
|
||||
trace!("wrote {} to zstd, len now {}", bytes.len(), compressed_len);
|
||||
if compressed_len > max_cache_size {
|
||||
debug!("cache longer than max, dropping");
|
||||
//writer.finish();
|
||||
zstd_writer.take();
|
||||
}
|
||||
}
|
||||
Poll::Ready(rdy)
|
||||
}
|
||||
Poll::Pending => Poll::Pending,
|
||||
yield bytes;
|
||||
}
|
||||
}
|
||||
// EOF, call on_finish
|
||||
let finish = {
|
||||
if let Some(mut writer) = zstd_writer.take() {
|
||||
writer.shutdown().await?;
|
||||
let res = writer.into_inner();
|
||||
if res.len() <= max_cache_size {
|
||||
(bytes_written, Some(res))
|
||||
} else {
|
||||
(bytes_written, None)
|
||||
}
|
||||
} else {
|
||||
(bytes_written, None)
|
||||
}
|
||||
};
|
||||
|
||||
// EOF, finish!
|
||||
on_finish(finish)
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
|
||||
|
||||
};
|
||||
|
||||
Ok(Box::pin(StreamReader::new(s)))
|
||||
}
|
||||
|
@ -13,7 +13,6 @@ pub mod recurse;
|
||||
pub mod test_utils;
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
pub use caching_writer::CachingReader;
|
||||
use directories_next::ProjectDirs;
|
||||
use std::time::Instant;
|
||||
|
||||
|
@ -1,10 +1,11 @@
|
||||
use crate::adapters::*;
|
||||
use crate::caching_writer::async_read_and_write_to_cache;
|
||||
use crate::config::RgaConfig;
|
||||
use crate::matching::*;
|
||||
use crate::recurse::concat_read_streams;
|
||||
use crate::{
|
||||
preproc_cache::{LmdbCache, PreprocCache},
|
||||
print_bytes, CachingReader,
|
||||
print_bytes,
|
||||
};
|
||||
use anyhow::*;
|
||||
use log::*;
|
||||
@ -53,7 +54,7 @@ async fn choose_adapter(
|
||||
* If a cache is passed, read/write to it.
|
||||
*
|
||||
*/
|
||||
pub async fn rga_preproc(ai: AdaptInfo<'_>) -> Result<ReadBox<'_>> {
|
||||
pub async fn rga_preproc(ai: AdaptInfo) -> Result<ReadBox> {
|
||||
debug!("path (hint) to preprocess: {:?}", ai.filepath_hint);
|
||||
/*todo: move if archive_recursion_depth >= config.max_archive_recursion.0 {
|
||||
let s = format!("{}[rga: max archive recursion reached]", line_prefix).into_bytes();
|
||||
@ -139,12 +140,12 @@ fn compute_cache_key(
|
||||
bincode::serialize(&key).context("could not serialize path")
|
||||
}
|
||||
}
|
||||
async fn run_adapter_recursively<'a>(
|
||||
ai: AdaptInfo<'a>,
|
||||
async fn run_adapter_recursively(
|
||||
ai: AdaptInfo,
|
||||
adapter: Rc<dyn FileAdapter>,
|
||||
detection_reason: FileMatcher,
|
||||
active_adapters: ActiveAdapters,
|
||||
) -> Result<ReadBox<'a>> {
|
||||
) -> Result<ReadBox> {
|
||||
let AdaptInfo {
|
||||
filepath_hint,
|
||||
is_real_file,
|
||||
@ -206,7 +207,7 @@ async fn run_adapter_recursively<'a>(
|
||||
)
|
||||
})?;
|
||||
let inp = concat_read_streams(inp);
|
||||
let inp = CachingReader::new(
|
||||
let inp = async_read_and_write_to_cache(
|
||||
inp,
|
||||
cache_max_blob_len.0.try_into().unwrap(),
|
||||
cache_compression_level.0.try_into().unwrap(),
|
||||
|
@ -4,13 +4,13 @@ use crate::{adapted_iter::AdaptedFilesIterBox, adapters::*};
|
||||
use async_stream::stream;
|
||||
use tokio_stream::StreamExt;
|
||||
|
||||
pub struct RecursingConcattyReader<'a> {
|
||||
inp: AdaptedFilesIterBox<'a>,
|
||||
cur: Option<ReadBox<'a>>,
|
||||
pub struct RecursingConcattyReader {
|
||||
inp: AdaptedFilesIterBox,
|
||||
cur: Option<ReadBox>,
|
||||
}
|
||||
pub fn concat_read_streams(
|
||||
mut input: AdaptedFilesIterBox<'_>,
|
||||
) -> ReadBox<'_> {
|
||||
mut input: AdaptedFilesIterBox,
|
||||
) -> ReadBox {
|
||||
let s = stream! {
|
||||
while let Some(output) = input.next() {
|
||||
let mut stream = ReaderStream::new(output.inp);
|
||||
|
@ -15,7 +15,7 @@ pub fn test_data_dir() -> PathBuf {
|
||||
d
|
||||
}
|
||||
|
||||
pub fn simple_adapt_info<'a>(filepath: &Path, inp: ReadBox<'a>) -> (AdaptInfo<'a>, FileMatcher) {
|
||||
pub fn simple_adapt_info<'a>(filepath: &Path, inp: ReadBox) -> (AdaptInfo, FileMatcher) {
|
||||
(
|
||||
AdaptInfo {
|
||||
filepath_hint: filepath.to_owned(),
|
||||
|
Loading…
Reference in New Issue
Block a user