remove lifetimes, fix

This commit is contained in:
phiresky 2022-10-29 23:56:25 +02:00
parent 906043060b
commit af8cf228b3
9 changed files with 125 additions and 171 deletions

View File

@ -1,24 +1,24 @@
use crate::adapters::AdaptInfo;
// TODO: using iterator trait possible?? should basically be Iterator<AdaptInfo>
pub trait AdaptedFilesIter {
pub trait AdaptedFilesIter: Send {
// next takes a 'a-lived reference and returns an AdaptInfo that lives as long as the reference
fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>>;
fn next<'a>(&'a mut self) -> Option<AdaptInfo>;
}
/// A single AdaptInfo
pub struct SingleAdaptedFileAsIter<'a> {
ai: Option<AdaptInfo<'a>>,
pub struct SingleAdaptedFileAsIter {
ai: Option<AdaptInfo>,
}
impl SingleAdaptedFileAsIter<'_> {
pub fn new<'a>(ai: AdaptInfo<'a>) -> SingleAdaptedFileAsIter<'a> {
impl SingleAdaptedFileAsIter {
pub fn new<'a>(ai: AdaptInfo) -> SingleAdaptedFileAsIter {
SingleAdaptedFileAsIter { ai: Some(ai) }
}
}
impl AdaptedFilesIter for SingleAdaptedFileAsIter<'_> {
fn next<'a>(&'a mut self) -> Option<AdaptInfo<'a>> {
impl AdaptedFilesIter for SingleAdaptedFileAsIter {
fn next<'a>(&'a mut self) -> Option<AdaptInfo> {
self.ai.take()
}
}
pub type AdaptedFilesIterBox<'a> = Box<dyn AdaptedFilesIter + 'a>;
pub type AdaptedFilesIterBox = Box<dyn AdaptedFilesIter>;

View File

@ -23,7 +23,7 @@ use std::path::PathBuf;
use std::pin::Pin;
use std::rc::Rc;
pub type ReadBox<'a> = Pin<Box<dyn AsyncRead + 'a>>;
pub type ReadBox = Pin<Box<dyn AsyncRead + Send>>;
pub struct AdapterMeta {
/// unique short name of this adapter (a-z0-9 only)
pub name: String,
@ -82,12 +82,12 @@ pub trait FileAdapter: GetMetadata {
/// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
fn adapt<'a>(
&self,
a: AdaptInfo<'a>,
a: AdaptInfo,
detection_reason: &FileMatcher,
) -> Result<AdaptedFilesIterBox<'a>>;
) -> Result<AdaptedFilesIterBox>;
}
pub struct AdaptInfo<'a> {
pub struct AdaptInfo {
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
pub filepath_hint: PathBuf,
/// true if filepath_hint is an actual file on the file system
@ -95,11 +95,11 @@ pub struct AdaptInfo<'a> {
/// depth at which this file is in archives. 0 for real filesystem
pub archive_recursion_depth: i32,
/// stream to read the file from. can be from a file or from some decoder
pub inp: ReadBox<'a>,
pub inp: ReadBox,
/// prefix every output line with this string to better indicate the file's location if it is in some archive
pub line_prefix: String,
pub postprocess: bool,
pub config: RgaConfig,
pub config: RgaConfig
}
/// (enabledAdapters, disabledAdapters)

View File

@ -2,15 +2,17 @@ use crate::adapted_iter::SingleAdaptedFileAsIter;
use super::*;
use anyhow::Result;
use async_stream::{stream, AsyncStream};
use bytes::{Buf, Bytes};
use log::*;
use tokio_util::io::StreamReader;
use crate::adapters::FileAdapter;
use std::future::Future;
use std::path::Path;
use std::process::{ExitStatus, Stdio};
use std::task::Poll;
use tokio::io::AsyncReadExt;
use tokio::process::Command;
use tokio::process::{Child, Command};
// TODO: don't separate the trait and the struct
pub trait SpawningFileAdapterTrait: GetMetadata {
@ -52,46 +54,38 @@ pub fn map_exe_error(err: std::io::Error, exe_name: &str, help: &str) -> Error {
/** waits for a process to finish, returns an io error if the process failed */
struct ProcWaitReader {
proce: Pin<Box<dyn Future<Output = std::io::Result<ExitStatus>>>>,
process: Option<Child>,
future: Option<Pin<Box<dyn Future<Output = std::io::Result<ExitStatus>>>>>,
}
impl AsyncRead for ProcWaitReader {
fn poll_read(
mut self: Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
_buf: &mut tokio::io::ReadBuf<'_>,
) -> std::task::Poll<std::io::Result<()>> {
match self.proce.as_mut().poll(cx) {
std::task::Poll::Ready(x) => {
let x = x?;
if x.success() {
Poll::Ready(std::io::Result::Ok(()))
impl ProcWaitReader {
fn new(cmd: Child) -> ProcWaitReader {
ProcWaitReader {
process: Some(cmd),
future: None,
}
}
}
fn proc_wait(mut child: Child) -> impl AsyncRead {
let s = stream! {
let res = child.wait().await?;
if res.success() {
yield std::io::Result::Ok(Bytes::new());
} else {
Poll::Ready(Err(std::io::Error::new(
yield std::io::Result::Err(std::io::Error::new(
std::io::ErrorKind::Other,
format_err!("subprocess failed: {:?}", x),
)))
}
}
Poll::Pending => std::task::Poll::Pending,
}
/*let status = self.proce.wait();
if status.success() {
std::io::Result::Ok(0)
} else {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
format_err!("subprocess failed: {:?}", status),
))
}*/
format_err!("subprocess failed: {:?}", res),
));
}
};
StreamReader::new(s)
}
pub fn pipe_output<'a>(
_line_prefix: &str,
mut cmd: Command,
inp: ReadBox<'a>,
inp: ReadBox,
exe_name: &str,
help: &str,
) -> Result<ReadBox<'a>> {
) -> Result<ReadBox> {
let mut cmd = cmd
.stdin(Stdio::piped())
.stdout(Stdio::piped())
@ -100,21 +94,19 @@ pub fn pipe_output<'a>(
let mut stdi = cmd.stdin.take().expect("is piped");
let stdo = cmd.stdout.take().expect("is piped");
tokio::task::spawn_local(async move {
tokio::io::copy(&mut inp, &mut stdi).await;
tokio::spawn(async move {
let mut z = inp;
tokio::io::copy(&mut z, &mut stdi).await;
});
Ok(Box::pin(stdo.chain(ProcWaitReader {
proce: Box::pin(cmd.wait()),
})))
Ok(Box::pin(stdo.chain(proc_wait(cmd))))
}
impl FileAdapter for SpawningFileAdapter {
fn adapt<'a>(
&self,
ai: AdaptInfo<'a>,
ai: AdaptInfo,
_detection_reason: &FileMatcher,
) -> Result<AdaptedFilesIterBox<'a>> {
) -> Result<AdaptedFilesIterBox> {
let AdaptInfo {
filepath_hint,
inp,

View File

@ -5,9 +5,11 @@ use ripgrep_all as rga;
use anyhow::Context;
use log::debug;
use std::{fs::File, time::Instant};
use std::{time::Instant};
use tokio::fs::File;
fn main() -> anyhow::Result<()> {
#[tokio::main]
async fn main() -> anyhow::Result<()> {
env_logger::init();
let mut arg_arr: Vec<std::ffi::OsString> = std::env::args_os().collect();
let last = arg_arr.pop().expect("No filename specified");
@ -18,10 +20,10 @@ fn main() -> anyhow::Result<()> {
std::env::current_dir()?.join(&filepath)
};
let i = File::open(&path).context("Specified input file not found")?;
let mut o = std::io::stdout();
let i = File::open(&path).await.context("Specified input file not found")?;
let mut o = tokio::io::stdout();
let ai = AdaptInfo {
inp: Box::new(i),
inp: Box::pin(i),
filepath_hint: path,
is_real_file: true,
line_prefix: "".to_string(),
@ -31,9 +33,9 @@ fn main() -> anyhow::Result<()> {
};
let start = Instant::now();
let mut oup = rga_preproc(ai).context("during preprocessing")?;
let mut oup = rga_preproc(ai).await.context("during preprocessing")?;
debug!("finding and starting adapter took {}", print_dur(start));
let res = std::io::copy(&mut oup, &mut o);
let res = tokio::io::copy(&mut oup, &mut o).await;
if let Err(e) = res {
if e.kind() == std::io::ErrorKind::BrokenPipe {
// happens if e.g. ripgrep detects binary data in the pipe so it cancels reading

View File

@ -1,112 +1,72 @@
use std::{pin::Pin, task::Poll};
use std::pin::Pin;
use anyhow::Result;
use async_compression::tokio::write::ZstdEncoder;
use async_stream::stream;
use log::*;
use tokio::{
io::{AsyncRead, AsyncWrite, AsyncWriteExt},
pin,
};
use tokio::io::{AsyncRead, AsyncWriteExt};
use tokio_stream::StreamExt;
use tokio_util::io::{ReaderStream, StreamReader};
use crate::adapters::ReadBox;
/**
* wrap a writer so that it is passthrough,
* wrap a AsyncRead so that it is passthrough,
* but also the written data is compressed and written into a buffer,
* unless more than max_cache_size bytes is written, then the cache is dropped and it is pure passthrough.
*/
pub struct CachingReader<R: AsyncRead> {
max_cache_size: usize,
// set to none if the size goes over the limit
zstd_writer: Option<ZstdEncoder<Vec<u8>>>,
inp: Pin<Box<R>>,
bytes_written: u64,
on_finish: Box<dyn FnOnce((u64, Option<Vec<u8>>)) -> Result<()> + Send>,
}
impl<R: AsyncRead> CachingReader<R> {
pub fn new(
inp: R,
pub fn async_read_and_write_to_cache<'a>(
inp: impl AsyncRead + Send +'a,
max_cache_size: usize,
compression_level: i32,
on_finish: Box<dyn FnOnce((u64, Option<Vec<u8>>)) -> Result<()> + Send>,
) -> Result<CachingReader<R>> {
Ok(CachingReader {
inp: Box::pin(inp),
max_cache_size,
zstd_writer: Some(ZstdEncoder::with_quality(
) -> Result<Pin<Box<dyn AsyncRead + Send +'a>>> {
let inp = Box::pin(inp);
let mut zstd_writer = Some(ZstdEncoder::with_quality(
Vec::new(),
async_compression::Level::Precise(compression_level as u32),
)),
bytes_written: 0,
on_finish,
})
}
pub fn finish(
&mut self,
cx: &mut std::task::Context<'_>,
) -> std::io::Result<(u64, Option<Vec<u8>>)> {
if let Some(writer) = self.zstd_writer.take() {
pin!(writer);
writer.as_mut().poll_shutdown(cx)?;
let res = writer.get_pin_mut().clone(); // TODO: without copying possible?
if res.len() <= self.max_cache_size {
return Ok((self.bytes_written, Some(res)));
}
}
Ok((self.bytes_written, None))
}
async fn write_to_compressed(&mut self, buf: &[u8]) -> std::io::Result<()> {
if let Some(writer) = self.zstd_writer.as_mut() {
writer.write_all(buf).await?;
));
let mut bytes_written = 0;
let s = stream! {
let mut stream = ReaderStream::new(inp);
while let Some(bytes) = stream.next().await {
if let Ok(bytes) = &bytes {
if let Some(writer) = zstd_writer.as_mut() {
writer.write_all(&bytes).await?;
bytes_written += bytes.len() as u64;
let compressed_len = writer.get_ref().len();
trace!("wrote {} to zstd, len now {}", buf.len(), compressed_len);
if compressed_len > self.max_cache_size {
trace!("wrote {} to zstd, len now {}", bytes.len(), compressed_len);
if compressed_len > max_cache_size {
debug!("cache longer than max, dropping");
//writer.finish();
self.zstd_writer.take();
zstd_writer.take();
}
}
Ok(())
}
}
impl<R> AsyncRead for CachingReader<R>
where
R: AsyncRead,
{
fn poll_read(
mut self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
mut buf: &mut tokio::io::ReadBuf<'_>,
) -> std::task::Poll<std::io::Result<()>> {
let old_filled_len = buf.filled().len();
match self.inp.as_mut().poll_read(cx, &mut buf) {
/*Ok(0) => {
yield bytes;
}
Ok(read_bytes) => {
self.write_to_compressed(&buf[0..read_bytes])?;
self.bytes_written += read_bytes as u64;
Ok(read_bytes)
}*/
Poll::Ready(rdy) => {
if let Ok(()) = &rdy {
let slice = buf.filled();
let read_bytes = slice.len() - old_filled_len;
if read_bytes == 0 {
// EOF
// move out of box, replace with noop lambda
let on_finish =
std::mem::replace(&mut self.on_finish, Box::new(|_| Ok(())));
// EOF, finish!
(on_finish)(self.finish(cx)?)
.map(|()| 0)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
// EOF, call on_finish
let finish = {
if let Some(mut writer) = zstd_writer.take() {
writer.shutdown().await?;
let res = writer.into_inner();
if res.len() <= max_cache_size {
(bytes_written, Some(res))
} else {
self.write_to_compressed(&slice[old_filled_len..]);
self.bytes_written += read_bytes as u64;
}
}
Poll::Ready(rdy)
}
Poll::Pending => Poll::Pending,
(bytes_written, None)
}
} else {
(bytes_written, None)
}
};
// EOF, finish!
on_finish(finish)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
};
Ok(Box::pin(StreamReader::new(s)))
}

View File

@ -13,7 +13,6 @@ pub mod recurse;
pub mod test_utils;
use anyhow::Context;
use anyhow::Result;
pub use caching_writer::CachingReader;
use directories_next::ProjectDirs;
use std::time::Instant;

View File

@ -1,10 +1,11 @@
use crate::adapters::*;
use crate::caching_writer::async_read_and_write_to_cache;
use crate::config::RgaConfig;
use crate::matching::*;
use crate::recurse::concat_read_streams;
use crate::{
preproc_cache::{LmdbCache, PreprocCache},
print_bytes, CachingReader,
print_bytes,
};
use anyhow::*;
use log::*;
@ -53,7 +54,7 @@ async fn choose_adapter(
* If a cache is passed, read/write to it.
*
*/
pub async fn rga_preproc(ai: AdaptInfo<'_>) -> Result<ReadBox<'_>> {
pub async fn rga_preproc(ai: AdaptInfo) -> Result<ReadBox> {
debug!("path (hint) to preprocess: {:?}", ai.filepath_hint);
/*todo: move if archive_recursion_depth >= config.max_archive_recursion.0 {
let s = format!("{}[rga: max archive recursion reached]", line_prefix).into_bytes();
@ -139,12 +140,12 @@ fn compute_cache_key(
bincode::serialize(&key).context("could not serialize path")
}
}
async fn run_adapter_recursively<'a>(
ai: AdaptInfo<'a>,
async fn run_adapter_recursively(
ai: AdaptInfo,
adapter: Rc<dyn FileAdapter>,
detection_reason: FileMatcher,
active_adapters: ActiveAdapters,
) -> Result<ReadBox<'a>> {
) -> Result<ReadBox> {
let AdaptInfo {
filepath_hint,
is_real_file,
@ -206,7 +207,7 @@ async fn run_adapter_recursively<'a>(
)
})?;
let inp = concat_read_streams(inp);
let inp = CachingReader::new(
let inp = async_read_and_write_to_cache(
inp,
cache_max_blob_len.0.try_into().unwrap(),
cache_compression_level.0.try_into().unwrap(),

View File

@ -4,13 +4,13 @@ use crate::{adapted_iter::AdaptedFilesIterBox, adapters::*};
use async_stream::stream;
use tokio_stream::StreamExt;
pub struct RecursingConcattyReader<'a> {
inp: AdaptedFilesIterBox<'a>,
cur: Option<ReadBox<'a>>,
pub struct RecursingConcattyReader {
inp: AdaptedFilesIterBox,
cur: Option<ReadBox>,
}
pub fn concat_read_streams(
mut input: AdaptedFilesIterBox<'_>,
) -> ReadBox<'_> {
mut input: AdaptedFilesIterBox,
) -> ReadBox {
let s = stream! {
while let Some(output) = input.next() {
let mut stream = ReaderStream::new(output.inp);

View File

@ -15,7 +15,7 @@ pub fn test_data_dir() -> PathBuf {
d
}
pub fn simple_adapt_info<'a>(filepath: &Path, inp: ReadBox<'a>) -> (AdaptInfo<'a>, FileMatcher) {
pub fn simple_adapt_info<'a>(filepath: &Path, inp: ReadBox) -> (AdaptInfo, FileMatcher) {
(
AdaptInfo {
filepath_hint: filepath.to_owned(),