remove lifetimes, fix

This commit is contained in:
phiresky 2022-10-29 23:56:25 +02:00
parent 906043060b
commit af8cf228b3
9 changed files with 125 additions and 171 deletions

View File

@ -1,24 +1,24 @@
use crate::adapters::AdaptInfo; use crate::adapters::AdaptInfo;
// TODO: using iterator trait possible?? should basically be Iterator<AdaptInfo> // TODO: using iterator trait possible?? should basically be Iterator<AdaptInfo>
pub trait AdaptedFilesIter { pub trait AdaptedFilesIter: Send {
// next takes a 'a-lived reference and returns an AdaptInfo that lives as long as the reference // next takes a 'a-lived reference and returns an AdaptInfo that lives as long as the reference
fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>>; fn next<'a>(&'a mut self) -> Option<AdaptInfo>;
} }
/// A single AdaptInfo /// A single AdaptInfo
pub struct SingleAdaptedFileAsIter<'a> { pub struct SingleAdaptedFileAsIter {
ai: Option<AdaptInfo<'a>>, ai: Option<AdaptInfo>,
} }
impl SingleAdaptedFileAsIter<'_> { impl SingleAdaptedFileAsIter {
pub fn new<'a>(ai: AdaptInfo<'a>) -> SingleAdaptedFileAsIter<'a> { pub fn new<'a>(ai: AdaptInfo) -> SingleAdaptedFileAsIter {
SingleAdaptedFileAsIter { ai: Some(ai) } SingleAdaptedFileAsIter { ai: Some(ai) }
} }
} }
impl AdaptedFilesIter for SingleAdaptedFileAsIter<'_> { impl AdaptedFilesIter for SingleAdaptedFileAsIter {
fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>> { fn next<'a>(&'a mut self) -> Option<AdaptInfo> {
self.ai.take() self.ai.take()
} }
} }
pub type AdaptedFilesIterBox<'a> = Box<dyn AdaptedFilesIter + 'a>; pub type AdaptedFilesIterBox = Box<dyn AdaptedFilesIter>;

View File

@ -23,7 +23,7 @@ use std::path::PathBuf;
use std::pin::Pin; use std::pin::Pin;
use std::rc::Rc; use std::rc::Rc;
pub type ReadBox<'a> = Pin<Box<dyn AsyncRead + 'a>>; pub type ReadBox = Pin<Box<dyn AsyncRead + Send>>;
pub struct AdapterMeta { pub struct AdapterMeta {
/// unique short name of this adapter (a-z0-9 only) /// unique short name of this adapter (a-z0-9 only)
pub name: String, pub name: String,
@ -82,12 +82,12 @@ pub trait FileAdapter: GetMetadata {
/// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher /// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
fn adapt<'a>( fn adapt<'a>(
&self, &self,
a: AdaptInfo<'a>, a: AdaptInfo,
detection_reason: &FileMatcher, detection_reason: &FileMatcher,
) -> Result<AdaptedFilesIterBox<'a>>; ) -> Result<AdaptedFilesIterBox>;
} }
pub struct AdaptInfo<'a> { pub struct AdaptInfo {
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions. /// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
pub filepath_hint: PathBuf, pub filepath_hint: PathBuf,
/// true if filepath_hint is an actual file on the file system /// true if filepath_hint is an actual file on the file system
@ -95,11 +95,11 @@ pub struct AdaptInfo<'a> {
/// depth at which this file is in archives. 0 for real filesystem /// depth at which this file is in archives. 0 for real filesystem
pub archive_recursion_depth: i32, pub archive_recursion_depth: i32,
/// stream to read the file from. can be from a file or from some decoder /// stream to read the file from. can be from a file or from some decoder
pub inp: ReadBox<'a>, pub inp: ReadBox,
/// prefix every output line with this string to better indicate the file's location if it is in some archive /// prefix every output line with this string to better indicate the file's location if it is in some archive
pub line_prefix: String, pub line_prefix: String,
pub postprocess: bool, pub postprocess: bool,
pub config: RgaConfig, pub config: RgaConfig
} }
/// (enabledAdapters, disabledAdapters) /// (enabledAdapters, disabledAdapters)

View File

@ -2,15 +2,17 @@ use crate::adapted_iter::SingleAdaptedFileAsIter;
use super::*; use super::*;
use anyhow::Result; use anyhow::Result;
use async_stream::{stream, AsyncStream};
use bytes::{Buf, Bytes};
use log::*; use log::*;
use tokio_util::io::StreamReader;
use crate::adapters::FileAdapter; use crate::adapters::FileAdapter;
use std::future::Future; use std::future::Future;
use std::path::Path; use std::path::Path;
use std::process::{ExitStatus, Stdio}; use std::process::{ExitStatus, Stdio};
use std::task::Poll;
use tokio::io::AsyncReadExt; use tokio::io::AsyncReadExt;
use tokio::process::Command; use tokio::process::{Child, Command};
// TODO: don't separate the trait and the struct // TODO: don't separate the trait and the struct
pub trait SpawningFileAdapterTrait: GetMetadata { pub trait SpawningFileAdapterTrait: GetMetadata {
@ -52,46 +54,38 @@ pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
/** waits for a process to finish, returns an io error if the process failed */ /** waits for a process to finish, returns an io error if the process failed */
struct ProcWaitReader { struct ProcWaitReader {
proce: Pin<Box<dyn Future<Output = std::io::Result<ExitStatus>>>>, process: Option<Child>,
future: Option<Pin<Box<dyn Future<Output = std::io::Result<ExitStatus>>>>>,
} }
impl AsyncRead for ProcWaitReader { impl ProcWaitReader {
fn poll_read( fn new(cmd: Child) -> ProcWaitReader {
mut self: Pin<&mut Self>, ProcWaitReader {
cx: &mut std::task::Context<'_>, process: Some(cmd),
_buf: &mut tokio::io::ReadBuf<'_>, future: None,
) -> std::task::Poll<std::io::Result<()>> { }
match self.proce.as_mut().poll(cx) { }
std::task::Poll::Ready(x) => { }
let x = x?; fn proc_wait(mut child: Child) -> impl AsyncRead {
if x.success() { let s = stream! {
Poll::Ready(std::io::Result::Ok(())) let res = child.wait().await?;
if res.success() {
yield std::io::Result::Ok(Bytes::new());
} else { } else {
Poll::Ready(Err(std::io::Error::new( yield std::io::Result::Err(std::io::Error::new(
std::io::ErrorKind::Other, std::io::ErrorKind::Other,
format_err!("subprocess failed: {:?}", x), format_err!("subprocess failed: {:?}", res),
))) ));
}
}
Poll::Pending => std::task::Poll::Pending,
}
/*let status = self.proce.wait();
if status.success() {
std::io::Result::Ok(0)
} else {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
format_err!("subprocess failed: {:?}", status),
))
}*/
} }
};
StreamReader::new(s)
} }
pub fn pipe_output<'a>( pub fn pipe_output<'a>(
_line_prefix: &str, _line_prefix: &str,
mut cmd: Command, mut cmd: Command,
inp: ReadBox<'a>, inp: ReadBox,
exe_name: &str, exe_name: &str,
help: &str, help: &str,
) -> Result<ReadBox<'a>> { ) -> Result<ReadBox> {
let mut cmd = cmd let mut cmd = cmd
.stdin(Stdio::piped()) .stdin(Stdio::piped())
.stdout(Stdio::piped()) .stdout(Stdio::piped())
@ -100,21 +94,19 @@ pub fn pipe_output<'a>(
let mut stdi = cmd.stdin.take().expect("is piped"); let mut stdi = cmd.stdin.take().expect("is piped");
let stdo = cmd.stdout.take().expect("is piped"); let stdo = cmd.stdout.take().expect("is piped");
tokio::task::spawn_local(async move { tokio::spawn(async move {
tokio::io::copy(&mut inp, &mut stdi).await; let mut z = inp;
tokio::io::copy(&mut z, &mut stdi).await;
}); });
Ok(Box::pin(stdo.chain(proc_wait(cmd))))
Ok(Box::pin(stdo.chain(ProcWaitReader {
proce: Box::pin(cmd.wait()),
})))
} }
impl FileAdapter for SpawningFileAdapter { impl FileAdapter for SpawningFileAdapter {
fn adapt<'a>( fn adapt<'a>(
&self, &self,
ai: AdaptInfo<'a>, ai: AdaptInfo,
_detection_reason: &FileMatcher, _detection_reason: &FileMatcher,
) -> Result<AdaptedFilesIterBox<'a>> { ) -> Result<AdaptedFilesIterBox> {
let AdaptInfo { let AdaptInfo {
filepath_hint, filepath_hint,
inp, inp,

View File

@ -5,9 +5,11 @@ use ripgrep_all as rga;
use anyhow::Context; use anyhow::Context;
use log::debug; use log::debug;
use std::{fs::File, time::Instant}; use std::{time::Instant};
use tokio::fs::File;
fn main() -> anyhow::Result<()> { #[tokio::main]
async fn main() -> anyhow::Result<()> {
env_logger::init(); env_logger::init();
let mut arg_arr: Vec<std::ffi::OsString> = std::env::args_os().collect(); let mut arg_arr: Vec<std::ffi::OsString> = std::env::args_os().collect();
let last = arg_arr.pop().expect("No filename specified"); let last = arg_arr.pop().expect("No filename specified");
@ -18,10 +20,10 @@ fn main() -> anyhow::Result<()> {
std::env::current_dir()?.join(&filepath) std::env::current_dir()?.join(&filepath)
}; };
let i = File::open(&path).context("Specified input file not found")?; let i = File::open(&path).await.context("Specified input file not found")?;
let mut o = std::io::stdout(); let mut o = tokio::io::stdout();
let ai = AdaptInfo { let ai = AdaptInfo {
inp: Box::new(i), inp: Box::pin(i),
filepath_hint: path, filepath_hint: path,
is_real_file: true, is_real_file: true,
line_prefix: "".to_string(), line_prefix: "".to_string(),
@ -31,9 +33,9 @@ fn main() -> anyhow::Result<()> {
}; };
let start = Instant::now(); let start = Instant::now();
let mut oup = rga_preproc(ai).context("during preprocessing")?; let mut oup = rga_preproc(ai).await.context("during preprocessing")?;
debug!("finding and starting adapter took {}", print_dur(start)); debug!("finding and starting adapter took {}", print_dur(start));
let res = std::io::copy(&mut oup, &mut o); let res = tokio::io::copy(&mut oup, &mut o).await;
if let Err(e) = res { if let Err(e) = res {
if e.kind() == std::io::ErrorKind::BrokenPipe { if e.kind() == std::io::ErrorKind::BrokenPipe {
// happens if e.g. ripgrep detects binary data in the pipe so it cancels reading // happens if e.g. ripgrep detects binary data in the pipe so it cancels reading

View File

@ -1,112 +1,72 @@
use std::{pin::Pin, task::Poll}; use std::pin::Pin;
use anyhow::Result; use anyhow::Result;
use async_compression::tokio::write::ZstdEncoder; use async_compression::tokio::write::ZstdEncoder;
use async_stream::stream;
use log::*; use log::*;
use tokio::{ use tokio::io::{AsyncRead, AsyncWriteExt};
io::{AsyncRead, AsyncWrite, AsyncWriteExt}, use tokio_stream::StreamExt;
pin, use tokio_util::io::{ReaderStream, StreamReader};
};
use crate::adapters::ReadBox;
/** /**
* wrap a writer so that it is passthrough, * wrap a AsyncRead so that it is passthrough,
* but also the written data is compressed and written into a buffer, * but also the written data is compressed and written into a buffer,
* unless more than max_cache_size bytes is written, then the cache is dropped and it is pure passthrough. * unless more than max_cache_size bytes is written, then the cache is dropped and it is pure passthrough.
*/ */
pub struct CachingReader<R: AsyncRead> { pub fn async_read_and_write_to_cache<'a>(
max_cache_size: usize, inp: impl AsyncRead + Send +'a,
// set to none if the size goes over the limit
zstd_writer: Option<ZstdEncoder<Vec<u8>>>,
inp: Pin<Box<R>>,
bytes_written: u64,
on_finish: Box<dyn FnOnce((u64, Option<Vec<u8>>)) -> Result<()> + Send>,
}
impl<R: AsyncRead> CachingReader<R> {
pub fn new(
inp: R,
max_cache_size: usize, max_cache_size: usize,
compression_level: i32, compression_level: i32,
on_finish: Box<dyn FnOnce((u64, Option<Vec<u8>>)) -> Result<()> + Send>, on_finish: Box<dyn FnOnce((u64, Option<Vec<u8>>)) -> Result<()> + Send>,
) -> Result<CachingReader<R>> { ) -> Result<Pin<Box<dyn AsyncRead + Send +'a>>> {
Ok(CachingReader { let inp = Box::pin(inp);
inp: Box::pin(inp), let mut zstd_writer = Some(ZstdEncoder::with_quality(
max_cache_size,
zstd_writer: Some(ZstdEncoder::with_quality(
Vec::new(), Vec::new(),
async_compression::Level::Precise(compression_level as u32), async_compression::Level::Precise(compression_level as u32),
)), ));
bytes_written: 0, let mut bytes_written = 0;
on_finish,
}) let s = stream! {
} let mut stream = ReaderStream::new(inp);
pub fn finish( while let Some(bytes) = stream.next().await {
&mut self, if let Ok(bytes) = &bytes {
cx: &mut std::task::Context<'_>, if let Some(writer) = zstd_writer.as_mut() {
) -> std::io::Result<(u64, Option<Vec<u8>>)> { writer.write_all(&bytes).await?;
if let Some(writer) = self.zstd_writer.take() { bytes_written += bytes.len() as u64;
pin!(writer);
writer.as_mut().poll_shutdown(cx)?;
let res = writer.get_pin_mut().clone(); // TODO: without copying possible?
if res.len() <= self.max_cache_size {
return Ok((self.bytes_written, Some(res)));
}
}
Ok((self.bytes_written, None))
}
async fn write_to_compressed(&mut self, buf: &[u8]) -> std::io::Result<()> {
if let Some(writer) = self.zstd_writer.as_mut() {
writer.write_all(buf).await?;
let compressed_len = writer.get_ref().len(); let compressed_len = writer.get_ref().len();
trace!("wrote {} to zstd, len now {}", buf.len(), compressed_len); trace!("wrote {} to zstd, len now {}", bytes.len(), compressed_len);
if compressed_len > self.max_cache_size { if compressed_len > max_cache_size {
debug!("cache longer than max, dropping"); debug!("cache longer than max, dropping");
//writer.finish(); //writer.finish();
self.zstd_writer.take(); zstd_writer.take();
} }
} }
Ok(())
} }
yield bytes;
} }
impl<R> AsyncRead for CachingReader<R> // EOF, call on_finish
where let finish = {
R: AsyncRead, if let Some(mut writer) = zstd_writer.take() {
{ writer.shutdown().await?;
fn poll_read( let res = writer.into_inner();
mut self: std::pin::Pin<&mut Self>, if res.len() <= max_cache_size {
cx: &mut std::task::Context<'_>, (bytes_written, Some(res))
mut buf: &mut tokio::io::ReadBuf<'_>,
) -> std::task::Poll<std::io::Result<()>> {
let old_filled_len = buf.filled().len();
match self.inp.as_mut().poll_read(cx, &mut buf) {
/*Ok(0) => {
}
Ok(read_bytes) => {
self.write_to_compressed(&buf[0..read_bytes])?;
self.bytes_written += read_bytes as u64;
Ok(read_bytes)
}*/
Poll::Ready(rdy) => {
if let Ok(()) = &rdy {
let slice = buf.filled();
let read_bytes = slice.len() - old_filled_len;
if read_bytes == 0 {
// EOF
// move out of box, replace with noop lambda
let on_finish =
std::mem::replace(&mut self.on_finish, Box::new(|_| Ok(())));
// EOF, finish!
(on_finish)(self.finish(cx)?)
.map(|()| 0)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
} else { } else {
self.write_to_compressed(&slice[old_filled_len..]); (bytes_written, None)
self.bytes_written += read_bytes as u64;
}
}
Poll::Ready(rdy)
}
Poll::Pending => Poll::Pending,
} }
} else {
(bytes_written, None)
} }
};
// EOF, finish!
on_finish(finish)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
};
Ok(Box::pin(StreamReader::new(s)))
} }

View File

@ -13,7 +13,6 @@ pub mod recurse;
pub mod test_utils; pub mod test_utils;
use anyhow::Context; use anyhow::Context;
use anyhow::Result; use anyhow::Result;
pub use caching_writer::CachingReader;
use directories_next::ProjectDirs; use directories_next::ProjectDirs;
use std::time::Instant; use std::time::Instant;

View File

@ -1,10 +1,11 @@
use crate::adapters::*; use crate::adapters::*;
use crate::caching_writer::async_read_and_write_to_cache;
use crate::config::RgaConfig; use crate::config::RgaConfig;
use crate::matching::*; use crate::matching::*;
use crate::recurse::concat_read_streams; use crate::recurse::concat_read_streams;
use crate::{ use crate::{
preproc_cache::{LmdbCache, PreprocCache}, preproc_cache::{LmdbCache, PreprocCache},
print_bytes, CachingReader, print_bytes,
}; };
use anyhow::*; use anyhow::*;
use log::*; use log::*;
@ -53,7 +54,7 @@ async fn choose_adapter(
* If a cache is passed, read/write to it. * If a cache is passed, read/write to it.
* *
*/ */
pub async fn rga_preproc(ai: AdaptInfo<'_>) -> Result<ReadBox<'_>> { pub async fn rga_preproc(ai: AdaptInfo) -> Result<ReadBox> {
debug!("path (hint) to preprocess: {:?}", ai.filepath_hint); debug!("path (hint) to preprocess: {:?}", ai.filepath_hint);
/*todo: move if archive_recursion_depth >= config.max_archive_recursion.0 { /*todo: move if archive_recursion_depth >= config.max_archive_recursion.0 {
let s = format!("{}[rga: max archive recursion reached]", line_prefix).into_bytes(); let s = format!("{}[rga: max archive recursion reached]", line_prefix).into_bytes();
@ -139,12 +140,12 @@ fn compute_cache_key(
bincode::serialize(&key).context("could not serialize path") bincode::serialize(&key).context("could not serialize path")
} }
} }
async fn run_adapter_recursively<'a>( async fn run_adapter_recursively(
ai: AdaptInfo<'a>, ai: AdaptInfo,
adapter: Rc<dyn FileAdapter>, adapter: Rc<dyn FileAdapter>,
detection_reason: FileMatcher, detection_reason: FileMatcher,
active_adapters: ActiveAdapters, active_adapters: ActiveAdapters,
) -> Result<ReadBox<'a>> { ) -> Result<ReadBox> {
let AdaptInfo { let AdaptInfo {
filepath_hint, filepath_hint,
is_real_file, is_real_file,
@ -206,7 +207,7 @@ async fn run_adapter_recursively<'a>(
) )
})?; })?;
let inp = concat_read_streams(inp); let inp = concat_read_streams(inp);
let inp = CachingReader::new( let inp = async_read_and_write_to_cache(
inp, inp,
cache_max_blob_len.0.try_into().unwrap(), cache_max_blob_len.0.try_into().unwrap(),
cache_compression_level.0.try_into().unwrap(), cache_compression_level.0.try_into().unwrap(),

View File

@ -4,13 +4,13 @@ use crate::{adapted_iter::AdaptedFilesIterBox, adapters::*};
use async_stream::stream; use async_stream::stream;
use tokio_stream::StreamExt; use tokio_stream::StreamExt;
pub struct RecursingConcattyReader<'a> { pub struct RecursingConcattyReader {
inp: AdaptedFilesIterBox<'a>, inp: AdaptedFilesIterBox,
cur: Option<ReadBox<'a>>, cur: Option<ReadBox>,
} }
pub fn concat_read_streams( pub fn concat_read_streams(
mut input: AdaptedFilesIterBox<'_>, mut input: AdaptedFilesIterBox,
) -> ReadBox<'_> { ) -> ReadBox {
let s = stream! { let s = stream! {
while let Some(output) = input.next() { while let Some(output) = input.next() {
let mut stream = ReaderStream::new(output.inp); let mut stream = ReaderStream::new(output.inp);

View File

@ -15,7 +15,7 @@ pub fn test_data_dir() -> PathBuf {
d d
} }
pub fn simple_adapt_info<'a>(filepath: &Path, inp: ReadBox<'a>) -> (AdaptInfo<'a>, FileMatcher) { pub fn simple_adapt_info<'a>(filepath: &Path, inp: ReadBox) -> (AdaptInfo, FileMatcher) {
( (
AdaptInfo { AdaptInfo {
filepath_hint: filepath.to_owned(), filepath_hint: filepath.to_owned(),