From 943da46e8268b7db028afdc91a43e036cb4dac4f Mon Sep 17 00:00:00 2001 From: Alexandre Bury Date: Mon, 8 Jan 2018 00:48:20 +0100 Subject: [PATCH] Add SpanListIterator --- Cargo.toml | 1 + src/lib.rs | 1 + src/theme.rs | 4 +- src/utils/lines_iterator.rs | 4 +- src/utils/mod.rs | 1 + src/utils/span_lines_iterator.rs | 848 +++++++++++++++++++++++++++---- 6 files changed, 756 insertions(+), 103 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 682f072..a18e3f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ owning_ref = "0.3" toml = "0.4" unicode-segmentation = "1.0" unicode-width = "0.1" +xi-unicode = "0.1.0" [dependencies.bear-lib-terminal] optional = true diff --git a/src/lib.rs b/src/lib.rs index 1536244..f0bbc71 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,6 +63,7 @@ extern crate log; #[macro_use] extern crate maplit; +extern crate xi_unicode; extern crate num; extern crate owning_ref; extern crate toml; diff --git a/src/theme.rs b/src/theme.rs index c1cd927..3bb2035 100644 --- a/src/theme.rs +++ b/src/theme.rs @@ -123,7 +123,7 @@ use toml; /// Combine a color and an effect. /// /// Represents any transformation that can be applied to text. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Style { /// Effect to apply. /// @@ -165,7 +165,7 @@ impl From for Style { } /// Text effect -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Effect { /// No effect Simple, diff --git a/src/utils/lines_iterator.rs b/src/utils/lines_iterator.rs index 1441b1b..d41df0f 100644 --- a/src/utils/lines_iterator.rs +++ b/src/utils/lines_iterator.rs @@ -26,8 +26,8 @@ impl<'a> LinesIterator<'a> { /// Yields rows of `width` cells or less. pub fn new(content: &'a str, width: usize) -> Self { LinesIterator { - content: content, - width: width, + content, + width, offset: 0, show_spaces: false, } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 11f9d9a..9eb081b 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -5,6 +5,7 @@ use unicode_width::UnicodeWidthStr; mod lines_iterator; mod reader; +pub mod span_lines_iterator; pub use self::lines_iterator::{LinesIterator, Row}; pub use self::reader::ProgressReader; diff --git a/src/utils/span_lines_iterator.rs b/src/utils/span_lines_iterator.rs index 7eef122..8a43d0a 100644 --- a/src/utils/span_lines_iterator.rs +++ b/src/utils/span_lines_iterator.rs @@ -1,150 +1,800 @@ +//! bla use std::borrow::Cow; +use std::iter::Peekable; use theme::Style; +use unicode_segmentation::UnicodeSegmentation; use unicode_width::UnicodeWidthStr; use xi_unicode::LineBreakLeafIter; +/// Input to the algorithm +#[derive(Debug, Clone, PartialEq, Eq)] pub struct Span<'a> { text: Cow<'a, str>, - width: usize, style: Style, } -pub struct Row<'a> { - spans: Vec>, - width: usize, +/// Refers to a part of a span +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Segment { + /// ID of the span this segment refers to + pub span_id: usize, + + /// Beginning of this segment within the span (included) + pub start: usize, + /// End of this segment within the span (excluded) + pub end: usize, + + /// Width of this segment + pub width: usize, } -pub struct SpanLinesIterator<'a: 'b, 'b> { - /// Input that we want to split - content: &'b [Span<'a>], +impl Segment { + #[cfg(test)] + fn with_text<'a>(self, text: &'a str) -> SegmentWithText<'a> { + SegmentWithText { text, seg: self } + } +} - /// Available width +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct SegmentWithText<'a> { + seg: Segment, + text: &'a str, +} + +/// Non-splittable piece of text. +#[derive(Debug, Clone, PartialEq, Eq)] +struct Chunk<'a> { width: usize, + segments: Vec>, + hard_stop: bool, + ends_with_space: bool, +} + +impl<'a> Chunk<'a> { + /// Remove some text from the front. + /// + /// We're given the length (number of bytes) and the width. + fn remove_front(&mut self, mut to_remove: ChunkPart) { + // Remove something from each segment until we've removed enough. + for segment in &mut self.segments { + if to_remove.length <= segment.seg.end - segment.seg.start { + // This segment is bigger than what we need to remove + // So just trim the prefix and stop there. + segment.seg.start += to_remove.length; + segment.seg.width -= to_remove.width; + segment.text = &segment.text[to_remove.length..]; + break; + } else { + // This segment is too small, so it'll disapear entirely. + to_remove.length -= segment.seg.end - segment.seg.start; + to_remove.width -= segment.seg.width; + + // Empty this segment + segment.seg.start = segment.seg.end; + segment.seg.width = 0; + segment.text = &""; + } + } + } + + /// Remove the last character from this chunk. + /// + /// Usually done to remove a trailing space/newline. + fn remove_last_char(&mut self) { + // We remove the last char in 2 situations: + // * Trailing space. + // * Trailing newline. + // Only in the first case does this affect width. + // (Because newlines have 0 width) + + if self.ends_with_space { + // Only reduce the width if the last char was a space. + // Otherwise it's a newline, and we don't want to reduce + // that. + self.width -= 1; + } + + // Is the last segment empty after trimming it? + // If yes, just drop it. + let last_empty = { + let last = self.segments.last_mut().unwrap(); + last.seg.end -= 1; + if self.ends_with_space { + last.seg.width -= 1; + } + last.seg.start == last.seg.end + }; + if last_empty { + self.segments.pop().unwrap(); + } + } +} + +/// Iterator that returns non-breakable chunks of text. +/// +/// Works accross spans of text. +struct ChunkIterator<'a, 'b> +where + 'a: 'b, +{ + /// Input that we want to split + spans: &'b [Span<'a>], current_span: usize, + + /// How much of the current span has been processed already. offset: usize, } -impl<'a: 'b, 'b> SpanLinesIterator<'a, 'b> { - pub fn new(content: &'b [Span<'a>], width: usize) -> Self { - SpanLinesIterator { - content, - width, +impl<'a, 'b> ChunkIterator<'a, 'b> +where + 'a: 'b, +{ + fn new(spans: &'b [Span<'a>]) -> Self { + ChunkIterator { + spans, current_span: 0, offset: 0, } } } -// Intermediate representation of a Span, easier to manipulate. -struct Segment { - span_id: usize, - start: usize, - end: usize, - width: usize, -} +/// This iterator produces chunks of non-breakable text. +/// +/// These chunks may go accross spans (a single word may be broken into more +/// than one span, for instance if parts of it are marked up differently). +impl<'a, 'b> Iterator for ChunkIterator<'a, 'b> +where + 'a: 'b, +{ + type Item = Chunk<'b>; -impl<'a, 'b> Iterator for SpanLinesIterator<'a, 'b> { - type Item = Row<'a>; - - fn next(&mut self) -> Option> { - if self.current_span >= self.content.len() { + fn next(&mut self) -> Option { + if self.current_span >= self.spans.len() { return None; } - let current_span = &self.content[self.current_span]; + let mut span: &Span<'a> = &self.spans[self.current_span]; - let mut available = self.width; - let mut iter = LineBreakLeafIter::new(¤t_span.text, self.offset); + let mut total_width = 0; - let mut spans = Vec::new(); - let mut width = 0; + // We'll use an iterator from xi-unicode to detect possible breaks. + let mut iter = LineBreakLeafIter::new(&span.text, self.offset); - // We'll build a list of segments. - // There will be a 1-for-1 mapping from segments to spans. - // But segments are easier to manipulate and extend for now. - let mut segments: Vec = Vec::new(); + // We'll accumulate segments from spans. + let mut segments = Vec::new(); - // When a span does not end on a possible break, its last segment - // can only be included depending on what comes after. - // So we keep a list of consecutive segments ids without breaks. - let mut carry_over: Vec = Vec::new(); - // Whenever a segment is accepted, all of these can be inserted too. + // When we reach the end of a span, xi-unicode returns a break, but it + // actually depends on the next span. Such breaks are "fake" breaks. + // So we'll loop until we find a "true" break + // (a break that doesn't happen an the end of a span). + // Most of the time, it will happen on the first iteration. + loop { + // Look at next possible break + // `hard_stop = true` means that the break is non-optional, + // like after a `\n`. + let (pos, hard_stop) = iter.next(&span.text); - 'outer: for (span_id, span) in - self.content.iter().enumerate().skip(self.current_span) - { - // Make a new segment! - loop { - // Get the next possible break point. - let (pos, hard) = iter.next(&span.text); + // When xi-unicode reaches the end of a span, it returns a "fake" + // break. To know if it's actually a true break, we need to give + // it the next span. If, given the next span, it returns a break + // at position 0, then the previous one was a true break. + // So when pos = 0, we don't really have a new segment, but we + // can end the current chunk. - // Lookup the corresponding text segment. - let segment = &span.text[self.offset..pos]; - let width = segment.width(); + let (width, ends_with_space) = if pos == 0 { + // If pos = 0, we had a span before. + let prev_span = &self.spans[self.current_span - 1]; + (0, prev_span.text.ends_with(' ')) + } else { + // We actually got something. + // Remember its width, and whether it ends with a space. + // + // (When a chunk ends with a space, we may compress it a bit + // near the end of a row, so this information will be useful + // later.) + let text = &span.text[self.offset..pos]; - // If it doesn't fit, it's time to go home. - if width > available { - // Early return! - break 'outer; - } + (text.width(), text.ends_with(' ')) + }; - available -= width; - - // It fits, but... for real? - if pos == span.text.len() { - // It was too good to be true! - // It's just the end of a span, not an actual break. - // So save this stub for now, and move on to the next span. - carry_over.push(span_id); - // Start on the next span. - self.offset = 0; - break; - } - - // We got it! We got a chunk! - // First, append any carry-over segment - for carry in carry_over.drain(..) { - // We need to include this entire segment. - if segments.last().map(|s| s.span_id) == Some(carry) { - - } else { - segments.push(Segment {}); - } - } - - // Include the present segment. - if pos != 0 { - segments.push(Segment { - span_id, - width, + if pos != 0 { + // If pos != 0, we got an actual segment of a span. + total_width += width; + segments.push(SegmentWithText { + seg: Segment { + span_id: self.current_span, start: self.offset, end: pos, + width, + }, + text: &span.text[self.offset..pos], + }); + } + + if pos == span.text.len() { + // If we reached the end of the slice, + // we need to look at the next span first. + self.current_span += 1; + + if self.current_span >= self.spans.len() { + // If this was the last chunk, return as is! + return Some(Chunk { + width: total_width, + segments, + hard_stop, + ends_with_space, }); - - self.offset = pos; } - if hard { - // Stop here. - break 'outer; + span = &self.spans[self.current_span]; + self.offset = 0; + continue; + } + + // Remember where we are. + self.offset = pos; + + // We found a valid stop, return the current chunk. + return Some(Chunk { + width: total_width, + segments, + hard_stop, + ends_with_space, + }); + } + } +} + +/// A list of segments representing a row of text +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Row { + /// List of segments + pub segments: Vec, + /// Total width for this row + pub width: usize, +} + +/// Generates rows of text in constrainted width. +/// +/// Works on spans of text. +pub struct SpanLinesIterator<'a, 'b> +where + 'a: 'b, +{ + iter: Peekable>, + + /// Available width + width: usize, + + /// If a chunk wouldn't fit, we had to cut it in pieces. + /// This is how far in the current chunk we are. + chunk_offset: ChunkPart, +} + +impl<'a, 'b> SpanLinesIterator<'a, 'b> +where + 'a: 'b, +{ + /// Creates a new iterator with the given content and width. + pub fn new(spans: &'b [Span<'a>], width: usize) -> Self { + SpanLinesIterator { + iter: ChunkIterator::new(spans).peekable(), + width, + chunk_offset: ChunkPart::default(), + } + } +} + +/// Result of a fitness test +/// +/// Describes how well a chunk fits in the available space. +enum ChunkFitResult { + /// This chunk can fit as-is + Fits, + + /// This chunk fits, but it'll be the last one. + /// Additionally, its last char may need to be removed. + FitsBarely, + + /// This chunk doesn't fit. Don't even. + DoesNotFit, +} + +/// Look at a chunk, and decide how it could fit. +fn consider_chunk(available: usize, chunk: &Chunk) -> ChunkFitResult { + if chunk.width <= available { + // We fits. No question about it. + if chunk.hard_stop { + // Still, we have to stop here. + // And possibly trim a newline. + ChunkFitResult::FitsBarely + } else { + // Nothing special here. + ChunkFitResult::Fits + } + } else if chunk.width == available + 1 { + // We're just SLIGHTLY too big! + // Can we just pop something? + if chunk.ends_with_space { + // Yay! + ChunkFitResult::FitsBarely + } else { + // Noo( + ChunkFitResult::DoesNotFit + } + } else { + // Can't bargain with me. + ChunkFitResult::DoesNotFit + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +/// Describes a part of a chunk. +/// +/// Includes both length and width to ease some computations. +/// +/// This is used to represent how much of a chunk we've already processed. +struct ChunkPart { + width: usize, + length: usize, +} + +/// Concatenates chunks as long as they fit in the given width. +fn prefix<'a, I>( + tokens: &mut Peekable, width: usize, offset: &mut ChunkPart +) -> Vec> +where + I: Iterator>, +{ + let mut available = width; + let mut chunks = Vec::new(); + + // Accumulate chunks until it doesn't fit. + loop { + // Look at the next chunk and see if it would fit. + let result = { + let next_chunk = match tokens.peek() { + None => break, + Some(chunk) => chunk, + }; + + // When considering if the chunk fits, remember that we may + // already have processed part of it. + // So (chunk - width) fits available + // if chunks fits (available + width) + consider_chunk(available + offset.width, next_chunk) + }; + + match result { + ChunkFitResult::Fits => { + // It fits! Add it and move to the next one. + let mut chunk = tokens.next().unwrap(); + // Remember to strip the prefix, in case we took some earlier. + chunk.remove_front(*offset); + // And reset out offset. + offset.length = 0; + offset.width = 0; + + available -= chunk.width; + chunks.push(chunk); + continue; + } + ChunkFitResult::FitsBarely => { + // That's it, it's the last one and we're off. + let mut chunk = tokens.next().unwrap(); + chunk.remove_front(*offset); + offset.length = 0; + offset.width = 0; + + // We know we need to remove the last character. + // Because it's either: + // * A hard stop: there is a newline + // * A compressed chunk: it ends with a space + chunk.remove_last_char(); + chunks.push(chunk); + // No need to update `available`, + // as we're ending the line anyway. + break; + } + ChunkFitResult::DoesNotFit => { + break; + } + } + } + + chunks +} + +impl<'a, 'b> Iterator for SpanLinesIterator<'a, 'b> +where + 'a: 'b, +{ + type Item = Row; + + fn next(&mut self) -> Option { + // Let's build a beautiful row. + + let mut chunks = + prefix(&mut self.iter, self.width, &mut self.chunk_offset); + + if chunks.is_empty() { + // Desperate action to make something fit: + // Look at the current chunk. We'll try to return a part of it. + // So now, consider each individual grapheme as a valid chunk. + // Note: it may not be the first time we try to fit this chunk, + // so remember to trim the offset we may have stored. + match self.iter.peek() { + None => return None, + Some(chunk) => { + let mut chunk = chunk.clone(); + chunk.remove_front(self.chunk_offset); + + // Try to fit part of it? + let graphemes = chunk.segments.iter().flat_map(|seg| { + let mut offset = seg.seg.start; + seg.text.graphemes(true).map(move |g| { + let width = g.width(); + let start = offset; + let end = offset + g.len(); + offset = end; + Chunk { + width, + segments: vec![ + SegmentWithText { + text: g, + seg: Segment { + width, + span_id: seg.seg.span_id, + start, + end, + }, + }, + ], + hard_stop: false, + ends_with_space: false, + } + }) + }); + chunks = prefix( + &mut graphemes.peekable(), + self.width, + &mut ChunkPart::default(), + ); + + if chunks.is_empty() { + // Seriously? After everything we did for you? + return None; + } + + // We are going to return a part of a chunk. + // So remember what we selected, + // so we can skip it next time. + let width: usize = + chunks.iter().map(|chunk| chunk.width).sum(); + let length: usize = chunks + .iter() + .flat_map(|chunk| chunk.segments.iter()) + .map(|segment| segment.text.len()) + .sum(); + + self.chunk_offset.width += width; + self.chunk_offset.length += length; } } } + let width = chunks.iter().map(|c| c.width).sum(); + assert!(width <= self.width); + + // Concatenate all segments + let segments = SegmentMergeIterator::new( + chunks + .into_iter() + .flat_map(|chunk| chunk.segments) + .map(|segment| segment.seg) + .filter(|segment| segment.start != segment.end), + ).collect(); + + // TODO: merge consecutive segments of the same span + + Some(Row { segments, width }) + } +} + +struct SegmentMergeIterator { + current: Option, + inner: I, +} + +impl SegmentMergeIterator { + fn new(inner: I) -> Self { + SegmentMergeIterator { + inner, + current: None, + } + } +} + +impl Iterator for SegmentMergeIterator +where + I: Iterator, +{ + type Item = Segment; + + fn next(&mut self) -> Option { + if self.current.is_none() { + self.current = self.inner.next(); + if self.current.is_none() { + return None; + } + } + loop { - let current_span = &self.content[self.current_span]; - let (pos, hard) = iter.next(¤t_span.text); - - // This is what we consider adding - let text = ¤t_span.text[self.offset..pos]; - - if hard { - // Stop there! - break; + match self.inner.next() { + None => return self.current.take(), + Some(next) => { + if next.span_id == self.current.unwrap().span_id { + let current = self.current.as_mut().unwrap(); + current.end = next.end; + current.width += next.width; + } else { + let current = self.current.take(); + self.current = Some(next); + return current; + } + } } } - - Some(Row { spans, width }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn input() -> Vec> { + vec![ + Span { + text: Cow::Borrowed("A beautiful "), + style: Style::none(), + }, + Span { + text: Cow::Borrowed("boat"), + style: Style::none(), + }, + Span { + text: Cow::Borrowed(" isn't it?\nYes indeed, my "), + style: Style::none(), + }, + Span { + text: Cow::Borrowed("Super"), + style: Style::none(), + }, + Span { + text: Cow::Borrowed("Captain !"), + style: Style::none(), + }, + ] + } + + #[test] + fn test_lines_iter() { + let input = input(); + + let iter = SpanLinesIterator::new(&input, 16); + let rows: Vec = iter.collect(); + + assert_eq!( + &rows[..], + &[ + Row { + segments: vec![ + Segment { + span_id: 0, + start: 0, + end: 12, + width: 12, + }, + Segment { + span_id: 1, + start: 0, + end: 4, + width: 4, + }, + ], + width: 16, + }, + Row { + segments: vec![ + Segment { + span_id: 2, + start: 1, + end: 10, + width: 9, + }, + ], + width: 9, + }, + Row { + segments: vec![ + Segment { + span_id: 2, + start: 11, + end: 26, + width: 15, + }, + ], + width: 15, + }, + Row { + segments: vec![ + Segment { + span_id: 3, + start: 0, + end: 5, + width: 5, + }, + Segment { + span_id: 4, + start: 0, + end: 9, + width: 9, + }, + ], + width: 14, + } + ] + ); + } + + #[test] + fn test_chunk_iter() { + let input = input(); + + let iter = ChunkIterator::new(&input); + let chunks: Vec = iter.collect(); + + assert_eq!( + &chunks[..], + &[ + Chunk { + width: 2, + segments: vec![ + Segment { + span_id: 0, + start: 0, + end: 2, + width: 2, + }.with_text("A "), + ], + hard_stop: false, + ends_with_space: true, + }, + Chunk { + width: 10, + segments: vec![ + Segment { + span_id: 0, + start: 2, + end: 12, + width: 10, + }.with_text("beautiful "), + ], + hard_stop: false, + ends_with_space: true, + }, + Chunk { + width: 5, + segments: vec![ + Segment { + span_id: 1, + start: 0, + end: 4, + width: 4, + }.with_text("boat"), + Segment { + span_id: 2, + start: 0, + end: 1, + width: 1, + }.with_text(" "), + ], + hard_stop: false, + ends_with_space: true, + }, + Chunk { + width: 6, + segments: vec![ + // "isn't " + Segment { + span_id: 2, + start: 1, + end: 7, + width: 6, + }.with_text("isn't "), + ], + hard_stop: false, + ends_with_space: true, + }, + Chunk { + width: 3, + segments: vec![ + // "it?\n" + Segment { + span_id: 2, + start: 7, + end: 11, + width: 3, + }.with_text("it?\n"), + ], + hard_stop: true, + ends_with_space: false, + }, + Chunk { + width: 4, + segments: vec![ + // "Yes " + Segment { + span_id: 2, + start: 11, + end: 15, + width: 4, + }.with_text("Yes "), + ], + hard_stop: false, + ends_with_space: true, + }, + Chunk { + width: 8, + segments: vec![ + // "indeed, " + Segment { + span_id: 2, + start: 15, + end: 23, + width: 8, + }.with_text("indeed, "), + ], + hard_stop: false, + ends_with_space: true, + }, + Chunk { + width: 3, + segments: vec![ + // "my " + Segment { + span_id: 2, + start: 23, + end: 26, + width: 3, + }.with_text("my "), + ], + hard_stop: false, + ends_with_space: true, + }, + Chunk { + width: 14, + segments: vec![ + // "Super" + Segment { + span_id: 3, + start: 0, + end: 5, + width: 5, + }.with_text("Super"), + // "Captain !" + Segment { + span_id: 4, + start: 0, + end: 9, + width: 9, + }.with_text("Captain !"), + ], + hard_stop: false, + ends_with_space: false, + } + ] + ); } }