From 3d1507c3cf11de3a2f5dbf8571aa9c6c6020b529 Mon Sep 17 00:00:00 2001 From: Alexandre Bury Date: Wed, 27 May 2015 22:13:51 -0700 Subject: [PATCH] Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. --- examples/key_codes.rs | 6 +++--- src/lib.rs | 5 +++-- src/utf8.rs | 50 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 src/utf8.rs diff --git a/examples/key_codes.rs b/examples/key_codes.rs index eeeed3d..61cdd30 100644 --- a/examples/key_codes.rs +++ b/examples/key_codes.rs @@ -4,12 +4,12 @@ use cursive::Cursive; use cursive::view::{View,BoxView}; use cursive::printer::Printer; -use cursive::event::EventResult; +use cursive::event::{EventResult,Event}; fn main() { let mut siv = Cursive::new(); - siv.add_layer(BoxView::new((10,4), KeyCodeView::new(4))); + siv.add_layer(BoxView::new((30,10), KeyCodeView::new(10))); siv.run(); } @@ -39,7 +39,7 @@ impl View for KeyCodeView { let line = match event { Event::CharEvent(c) => format!("Char: {}", c), Event::KeyEvent(key) => format!("Key: {}", key), - } + }; self.history.push(line); while self.history.len() > self.size { diff --git a/src/lib.rs b/src/lib.rs index 7ffc072..c066761 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,7 @@ pub mod vec; pub mod color; mod div; +mod utf8; use std::any::Any; use std::rc::Rc; @@ -201,8 +202,8 @@ impl Cursive { let ch = ncurses::getch(); // Is it a UTF-8 starting point? - if 32 <= ch && ch < 127 { - Event::CharEvent(ch as u8 as char) + if 32 <= ch && ch < 0x100 { + Event::CharEvent(utf8::read_char(ch as u8, || ncurses::getch() as u8).unwrap()) } else { Event::KeyEvent(Key::from_ncurses(ch)) } diff --git a/src/utf8.rs b/src/utf8.rs new file mode 100644 index 0000000..2f1e9a9 --- /dev/null +++ b/src/utf8.rs @@ -0,0 +1,50 @@ +use std::char::from_u32; + +/// Reads a char from a first byte, and a function to fetch next bytes as required. +/// +/// Returns an error if the stream is invalid utf-8. +pub fn read_char(first: u8, next: F) -> Result + where F: Fn() -> u8 +{ + if first < 0x80 { + return Ok(first as char) + } + + // Number of leading 1s determines the number of bytes we'll have to read + let n_bytes = match (!first).leading_zeros() { + n @ 2 ... 6 => n as usize, + 1 => return Err("First byte is continuation byte.".to_string()), + 7 ... 8 => return Err("WTF is this byte??".to_string()), + _ => unreachable!(), + }; + + let mut res = 0u32; + + // First, get the data - only the few last bits + res |= (first & make_mask(7 - n_bytes)) as u32; + + // We already have one byte, now read the others. + for _ in 1..n_bytes { + let byte = next(); + if byte & 0xC0 != 0x80 { + return Err(format!("Found non-continuation byte after leading: {}", byte)); + } + // We have 6 fresh new bits to read, make room. + res <<= 6; + // 0x3F is 00111111, so we keep the last 6 bits + res |= (byte & 0x3F) as u32; + } + + // from_u32 could return an error if we gave it invalid utf-8. + // But we're probably safe since we respected the rules when building it. + Ok(from_u32(res).unwrap()) +} + +// Returns a simple bitmask with n 1s to the right. +fn make_mask(n: usize) -> u8 { + let mut r = 0u8; + for i in 0..n { + r |= 1 << i; + } + r +}