better custom adapter

This commit is contained in:
phiresky 2023-02-19 00:05:51 +01:00
parent ddcfff9b4d
commit 523baf6db2
3 changed files with 29 additions and 42 deletions

View File

@ -42,14 +42,15 @@ pub struct CustomAdapterConfig {
/// the name or path of the binary to run
pub binary: String,
/// The arguments to run the program with. Placeholders:
/// {}: the file path (TODO)
/// - $input_file_extension: the file extension (without dot). e.g. foo.tar.gz -> gz
/// - $input_file_stem, the file name without the last extension. e.g. foo.tar.gz -> foo.tar
/// - $input_path: the full input file path
/// stdin of the program will be connected to the input file, and stdout is assumed to be the converted file
pub args: Vec<String>,
/// The output path hint.
/// The output path hint. The placeholders are the same as for `.args`
///
/// If not set, defaults to ${input_path}.txt
///
/// TODO: make more flexible for inner matching (e.g. foo.tar.gz should be foo.tar after gunzipping)
pub output_path_hint: Option<String>,
}
@ -104,7 +105,7 @@ lazy_static! {
// simpler markown (with more information loss but plainer text)
//.arg("--to=commonmark-header_attributes-link_attributes-fenced_divs-markdown_in_html_blocks-raw_html-native_divs-native_spans-bracketed_spans")
args: strs(&[
"--from=$file_extension",
"--from=$input_file_extension",
"--to=plain",
"--wrap=none",
"--markdown-headings=atx"
@ -194,13 +195,18 @@ impl GetMetadata for CustomSpawningFileAdapter {
}
}
fn arg_replacer(arg: &str, filepath_hint: &Path) -> Result<String> {
Ok(expand_str_ez(arg, |s| match s {
"file_extension" => filepath_hint
expand_str_ez(arg, |s| match s {
"input_path" => Ok(filepath_hint.to_string_lossy()),
"input_file_stem" => Ok(filepath_hint
.file_stem()
.unwrap_or_default()
.to_string_lossy()),
"input_file_extension" => Ok(filepath_hint
.extension()
.map(|e| e.to_string_lossy())
.unwrap_or_default(),
_ => panic!("unknown replacer"),
}))
.unwrap_or_default()
.to_string_lossy()),
e => Err(anyhow::format_err!("unknown replacer ${{{e}}}")),
})
}
impl CustomSpawningFileAdapter {
fn command(
@ -241,15 +247,12 @@ impl FileAdapter for CustomSpawningFileAdapter {
debug!("executing {:?}", cmd);
let output = pipe_output(&line_prefix, cmd, inp, &self.binary, "")?;
Ok(one_file(AdaptInfo {
filepath_hint: PathBuf::from(expand_str_ez(
filepath_hint: PathBuf::from(arg_replacer(
self.output_path_hint
.as_deref()
.unwrap_or("${input_path}.txt"),
|r| match r {
"input_path" => filepath_hint.to_string_lossy(),
_ => panic!("unknown replacer in output_path_hint"),
},
)),
&filepath_hint,
)?),
inp: output,
line_prefix,
is_real_file: false,

View File

@ -1,6 +1,6 @@
use std::borrow::Cow;
use regex::Captures;
use anyhow::Result;
// from https://github.com/phiresky/timetrackrs/blob/1c3df09ba2c1fda6065f2927045bd28dea0738d3/src/expand.rs
@ -19,35 +19,18 @@ pub fn find_byte(needle: u8, haystack: &[u8]) -> Option<usize> {
imp(needle, haystack)
}
pub fn get_capture<'a>(caps: &'a [Captures], reference: &str) -> Option<&'a str> {
caps.iter()
.flat_map(|caps| caps.name(reference))
.next()
.map(|m| m.as_str())
}
pub fn expand_str_captures(caps: &[Captures], replacement: &str) -> String {
let mut dst = String::new();
expand_str_lambda(
|reference: &str| Cow::Borrowed(get_capture(caps, reference).unwrap_or("")),
replacement,
&mut dst,
);
dst
}
pub fn expand_str_ez<'a, F>(replacement: &'a str, lambda: F) -> String
pub fn expand_str_ez<'a, F>(replacement: &'a str, lambda: F) -> Result<String>
where
F: Fn(&str) -> Cow<'a, str>,
F: Fn(&str) -> Result<Cow<'a, str>>,
{
let mut dst = String::new();
expand_str_lambda(lambda, replacement, &mut dst);
dst
expand_str_lambda(lambda, replacement, &mut dst)?;
Ok(dst)
}
pub fn expand_str_lambda<'a, F>(cap: F, replacement: &'a str, dst: &mut String)
pub fn expand_str_lambda<'a, F>(cap: F, replacement: &'a str, dst: &mut String) -> Result<()>
where
F: Fn(&str) -> Cow<'a, str>,
F: Fn(&str) -> Result<Cow<'a, str>>,
{
let mut replacement = replacement;
while !replacement.is_empty() {
@ -73,9 +56,10 @@ where
}
};
replacement = &replacement[cap_ref.end..];
dst.push_str(cap(cap_ref.cap).as_ref());
dst.push_str(cap(cap_ref.cap)?.as_ref());
}
dst.push_str(replacement);
Ok(())
}
/// `CaptureRef` represents a reference to a capture group inside some text.

View File

@ -243,7 +243,7 @@ pub fn loop_adapt(
ai.filepath_hint.to_string_lossy(),
&adapter.metadata().name
);
for await ifile in loop_adapt(adapter.as_ref(), detection_reason, ai).expect("todo: handle") {
for await ifile in loop_adapt(adapter.as_ref(), detection_reason, ai)? {
yield ifile;
}
}