better custom adapter

This commit is contained in:
phiresky 2023-02-19 00:05:51 +01:00
parent ddcfff9b4d
commit 523baf6db2
3 changed files with 29 additions and 42 deletions

View File

@ -42,14 +42,15 @@ pub struct CustomAdapterConfig {
/// the name or path of the binary to run /// the name or path of the binary to run
pub binary: String, pub binary: String,
/// The arguments to run the program with. Placeholders: /// The arguments to run the program with. Placeholders:
/// {}: the file path (TODO) /// - $input_file_extension: the file extension (without dot). e.g. foo.tar.gz -> gz
/// - $input_file_stem, the file name without the last extension. e.g. foo.tar.gz -> foo.tar
/// - $input_path: the full input file path
/// stdin of the program will be connected to the input file, and stdout is assumed to be the converted file /// stdin of the program will be connected to the input file, and stdout is assumed to be the converted file
pub args: Vec<String>, pub args: Vec<String>,
/// The output path hint. /// The output path hint. The placeholders are the same as for `.args`
/// ///
/// If not set, defaults to ${input_path}.txt /// If not set, defaults to ${input_path}.txt
/// ///
/// TODO: make more flexible for inner matching (e.g. foo.tar.gz should be foo.tar after gunzipping)
pub output_path_hint: Option<String>, pub output_path_hint: Option<String>,
} }
@ -104,7 +105,7 @@ lazy_static! {
// simpler markown (with more information loss but plainer text) // simpler markown (with more information loss but plainer text)
//.arg("--to=commonmark-header_attributes-link_attributes-fenced_divs-markdown_in_html_blocks-raw_html-native_divs-native_spans-bracketed_spans") //.arg("--to=commonmark-header_attributes-link_attributes-fenced_divs-markdown_in_html_blocks-raw_html-native_divs-native_spans-bracketed_spans")
args: strs(&[ args: strs(&[
"--from=$file_extension", "--from=$input_file_extension",
"--to=plain", "--to=plain",
"--wrap=none", "--wrap=none",
"--markdown-headings=atx" "--markdown-headings=atx"
@ -194,13 +195,18 @@ impl GetMetadata for CustomSpawningFileAdapter {
} }
} }
fn arg_replacer(arg: &str, filepath_hint: &Path) -> Result<String> { fn arg_replacer(arg: &str, filepath_hint: &Path) -> Result<String> {
Ok(expand_str_ez(arg, |s| match s { expand_str_ez(arg, |s| match s {
"file_extension" => filepath_hint "input_path" => Ok(filepath_hint.to_string_lossy()),
"input_file_stem" => Ok(filepath_hint
.file_stem()
.unwrap_or_default()
.to_string_lossy()),
"input_file_extension" => Ok(filepath_hint
.extension() .extension()
.map(|e| e.to_string_lossy()) .unwrap_or_default()
.unwrap_or_default(), .to_string_lossy()),
_ => panic!("unknown replacer"), e => Err(anyhow::format_err!("unknown replacer ${{{e}}}")),
})) })
} }
impl CustomSpawningFileAdapter { impl CustomSpawningFileAdapter {
fn command( fn command(
@ -241,15 +247,12 @@ impl FileAdapter for CustomSpawningFileAdapter {
debug!("executing {:?}", cmd); debug!("executing {:?}", cmd);
let output = pipe_output(&line_prefix, cmd, inp, &self.binary, "")?; let output = pipe_output(&line_prefix, cmd, inp, &self.binary, "")?;
Ok(one_file(AdaptInfo { Ok(one_file(AdaptInfo {
filepath_hint: PathBuf::from(expand_str_ez( filepath_hint: PathBuf::from(arg_replacer(
self.output_path_hint self.output_path_hint
.as_deref() .as_deref()
.unwrap_or("${input_path}.txt"), .unwrap_or("${input_path}.txt"),
|r| match r { &filepath_hint,
"input_path" => filepath_hint.to_string_lossy(), )?),
_ => panic!("unknown replacer in output_path_hint"),
},
)),
inp: output, inp: output,
line_prefix, line_prefix,
is_real_file: false, is_real_file: false,

View File

@ -1,6 +1,6 @@
use std::borrow::Cow; use std::borrow::Cow;
use regex::Captures; use anyhow::Result;
// from https://github.com/phiresky/timetrackrs/blob/1c3df09ba2c1fda6065f2927045bd28dea0738d3/src/expand.rs // from https://github.com/phiresky/timetrackrs/blob/1c3df09ba2c1fda6065f2927045bd28dea0738d3/src/expand.rs
@ -19,35 +19,18 @@ pub fn find_byte(needle: u8, haystack: &[u8]) -> Option<usize> {
imp(needle, haystack) imp(needle, haystack)
} }
pub fn get_capture<'a>(caps: &'a [Captures], reference: &str) -> Option<&'a str> { pub fn expand_str_ez<'a, F>(replacement: &'a str, lambda: F) -> Result<String>
caps.iter()
.flat_map(|caps| caps.name(reference))
.next()
.map(|m| m.as_str())
}
pub fn expand_str_captures(caps: &[Captures], replacement: &str) -> String {
let mut dst = String::new();
expand_str_lambda(
|reference: &str| Cow::Borrowed(get_capture(caps, reference).unwrap_or("")),
replacement,
&mut dst,
);
dst
}
pub fn expand_str_ez<'a, F>(replacement: &'a str, lambda: F) -> String
where where
F: Fn(&str) -> Cow<'a, str>, F: Fn(&str) -> Result<Cow<'a, str>>,
{ {
let mut dst = String::new(); let mut dst = String::new();
expand_str_lambda(lambda, replacement, &mut dst); expand_str_lambda(lambda, replacement, &mut dst)?;
dst Ok(dst)
} }
pub fn expand_str_lambda<'a, F>(cap: F, replacement: &'a str, dst: &mut String) pub fn expand_str_lambda<'a, F>(cap: F, replacement: &'a str, dst: &mut String) -> Result<()>
where where
F: Fn(&str) -> Cow<'a, str>, F: Fn(&str) -> Result<Cow<'a, str>>,
{ {
let mut replacement = replacement; let mut replacement = replacement;
while !replacement.is_empty() { while !replacement.is_empty() {
@ -73,9 +56,10 @@ where
} }
}; };
replacement = &replacement[cap_ref.end..]; replacement = &replacement[cap_ref.end..];
dst.push_str(cap(cap_ref.cap).as_ref()); dst.push_str(cap(cap_ref.cap)?.as_ref());
} }
dst.push_str(replacement); dst.push_str(replacement);
Ok(())
} }
/// `CaptureRef` represents a reference to a capture group inside some text. /// `CaptureRef` represents a reference to a capture group inside some text.

View File

@ -243,7 +243,7 @@ pub fn loop_adapt(
ai.filepath_hint.to_string_lossy(), ai.filepath_hint.to_string_lossy(),
&adapter.metadata().name &adapter.metadata().name
); );
for await ifile in loop_adapt(adapter.as_ref(), detection_reason, ai).expect("todo: handle") { for await ifile in loop_adapt(adapter.as_ref(), detection_reason, ai)? {
yield ifile; yield ifile;
} }
} }