From add39f652ac47ed0d20eb7a9ead3e7354e4ffb8b Mon Sep 17 00:00:00 2001 From: Ferran Basora Date: Sun, 26 Dec 2021 22:34:59 +0000 Subject: [PATCH] Capture multiple groups in the same regexp Adding the capability to capture multiple tokens in the same regexp. Useful for diff summary pattern --- samples/test6 | 10 +++++++++ src/state.rs | 56 ++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 52 insertions(+), 14 deletions(-) create mode 100644 samples/test6 diff --git a/samples/test6 b/samples/test6 new file mode 100644 index 0000000..0e131ae --- /dev/null +++ b/samples/test6 @@ -0,0 +1,10 @@ +diff --git a/src/state.rs b/src/state.rs +index 022c61f..4321097 100644 +--- a/src/state.rs ++++ b/src/state.rs +@@ -10,7 +10,7 @@ const PATTERNS: [(&'static str, &'static str); 14] = [] + +diff --git a/src/view.rs b/src/state.rs +index 022c61f..4321097 100644 +--- a/src/view.rs ++++ b/src/state.rs diff --git a/src/state.rs b/src/state.rs index 4321097..7318091 100644 --- a/src/state.rs +++ b/src/state.rs @@ -4,13 +4,17 @@ use std::fmt; const EXCLUDE_PATTERNS: [(&'static str, &'static str); 1] = [("bash", r"[[:cntrl:]]\[([0-9]{1,2};)?([0-9]{1,2})?m")]; -const PATTERNS: [(&'static str, &'static str); 14] = [ +const PATTERNS: [(&'static str, &'static str); 15] = [ ("markdown_url", r"\[[^]]*\]\(([^)]+)\)"), - ("url", r"((https?://|git@|git://|ssh://|ftp://|file:///)[^ ]+)"), + ("url", r"(?P(https?://|git@|git://|ssh://|ftp://|file:///)[^ ]+)"), + ( + "diff_summary", + r"diff --git a/([.\w\-@~\[\]]+?/[.\w\-@\[\]]++) b/([.\w\-@~\[\]]+?/[.\w\-@\[\]]++)", + ), ("diff_a", r"--- a/([^ ]+)"), ("diff_b", r"\+\+\+ b/([^ ]+)"), ("docker", r"sha256:([0-9a-f]{64})"), - ("path", r"(([.\w\-@~\[\]]+)?(/[.\w\-@\[\]]+)+)"), + ("path", r"(?P([.\w\-@~\[\]]+)?(/[.\w\-@\[\]]+)+)"), ("color", r"#[0-9a-fA-F]{6}"), ("uid", r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"), ("ipfs", r"Qm[0-9a-zA-Z]{44}"), @@ -84,6 +88,7 @@ impl<'a> State<'a> { .map(|tuple| (tuple.0, Regex::new(tuple.1).unwrap())) .collect::>(); + // This order determines the priority of pattern matching let all_patterns = [exclude_patterns, custom_patterns, patterns].concat(); for (index, line) in self.lines.iter().enumerate() { @@ -91,6 +96,7 @@ impl<'a> State<'a> { let mut offset: i32 = 0; loop { + // For this line we search which patterns match, all of them. let submatches = all_patterns .iter() .filter_map(|tuple| match tuple.1.find_iter(chunk).nth(0) { @@ -98,6 +104,8 @@ impl<'a> State<'a> { None => None, }) .collect::>(); + + // Then, we search for the match with the lowest index let first_match_option = submatches.iter().min_by(|x, y| x.2.start().cmp(&y.2.start())); if let Some(first_match) = first_match_option { @@ -105,21 +113,30 @@ impl<'a> State<'a> { let text = matching.as_str(); if let Some(captures) = pattern.captures(text) { - let (subtext, substart) = if let Some(capture) = captures.get(1) { - (capture.as_str(), capture.start()) + let captures: Vec<(&str, usize)> = if let Some(capture) = captures.name("match") { + [(capture.as_str(), capture.start())].to_vec() + } else if captures.len() > 1 { + captures + .iter() + .skip(1) + .filter_map(|capture| capture) + .map(|capture| (capture.as_str(), capture.start())) + .collect::>() } else { - (matching.as_str(), 0) + [(matching.as_str(), 0)].to_vec() }; - // Never hint or broke bash color sequences + // Never hint or broke bash color sequences, but process it if *name != "bash" { - matches.push(Match { - x: offset + matching.start() as i32 + substart as i32, - y: index as i32, - pattern: name, - text: subtext, - hint: None, - }); + for (subtext, substart) in captures.iter() { + matches.push(Match { + x: offset + matching.start() as i32 + *substart as i32, + y: index as i32, + pattern: name, + text: subtext, + hint: None, + }); + } } chunk = chunk.get(matching.end()..).expect("Unknown chunk"); @@ -409,6 +426,17 @@ mod tests { assert_eq!(results.get(0).unwrap().text.clone(), "src/main.rs"); } + #[test] + fn match_diff_summary() { + let lines = split("diff --git a/samples/test1 b/samples/test2"); + let custom = [].to_vec(); + let results = State::new(&lines, "abcd", &custom).matches(false, false); + + assert_eq!(results.len(), 2); + assert_eq!(results.get(0).unwrap().text.clone(), "samples/test1"); + assert_eq!(results.get(1).unwrap().text.clone(), "samples/test2"); + } + #[test] fn priority() { let lines = split("Lorem [link](http://foo.bar) ipsum CUSTOM-52463 lorem ISSUE-123 lorem\nLorem /var/fd70b569/9999.log 52463 lorem\n Lorem 973113 lorem 123e4567-e89b-12d3-a456-426655440000 lorem 8888 lorem\n https://crates.io/23456/fd70b569 lorem");