cursive/src/utf8.rs

use std::char::from_u32;

/// Reads a potentially multi-bytes utf8 codepoint.
///
/// Reads the given first byte, and uses the given
/// function to get more if needed.
///
/// Returns an error if the stream is invalid utf-8.
#[allow(dead_code)]
pub fn read_char<F>(first: u8, next: F) -> Result<char, String>
where
    F: Fn() -> Option<u8>,
{
    if first < 0x80 {
        return Ok(first as char);
    }

    // Number of leading 1s determines the number of bytes we'll have to read
    let n_bytes = match (!first).leading_zeros() {
        n @ 2..=6 => n as usize,
        1 => return Err("First byte is continuation byte.".to_string()),
        7..=8 => return Err("WTF is this byte??".to_string()),
        _ => unreachable!(),
    };

    let mut res = 0u32;

    // First, get the data - only the few last bits
    res |= u32::from(first & make_mask(7 - n_bytes));

    // We already have one byte, now read the others.
    for _ in 1..n_bytes {
        let byte = next().ok_or_else(|| "Missing UTF-8 byte".to_string())?;
        if byte & 0xC0 != 0x80 {
            return Err(format!(
                "Found non-continuation byte after leading: \
                 {}",
                byte
            ));
        }
        // We have 6 fresh new bits to read, make room.
        res <<= 6;
        // 0x3F is 00111111, so we keep the last 6 bits
        res |= u32::from(byte & 0x3F);
    }

    // from_u32 could return an error if we gave it invalid utf-8.
    // But we're probably safe since we respected the rules when building it.
    Ok(from_u32(res).unwrap())
}

// Returns a simple bitmask with n 1s to the right.
#[allow(dead_code)]
fn make_mask(n: usize) -> u8 {
    let mut r = 0u8;
    for i in 0..n {
        r |= 1 << i;
    }
    r
}
Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. 2015-05-28 05:13:51 +00:00			`use std::char::from_u32;`

Rustfmt 2016-07-11 02:11:21 +00:00			`/// Reads a potentially multi-bytes utf8 codepoint.`
			`///`
			`/// Reads the given first byte, and uses the given`
			`/// function to get more if needed.`
Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. 2015-05-28 05:13:51 +00:00			`///`
			`/// Returns an error if the stream is invalid utf-8.`
Fix warnings * Allow dead code for curses-only utf8 module * Remove intermediate variable in IdView 2017-03-27 20:27:50 +00:00			`#[allow(dead_code)]`
Update for new ncurses version 2016-03-15 22:37:57 +00:00			`pub fn read_char<F>(first: u8, next: F) -> Result<char, String>`
Rustfmt 2017-10-12 23:38:55 +00:00			`where`
			`F: Fn() -> Option<u8>,`
Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. 2015-05-28 05:13:51 +00:00			`{`
			`if first < 0x80 {`
Update for new ncurses version 2016-03-15 22:37:57 +00:00			`return Ok(first as char);`
Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. 2015-05-28 05:13:51 +00:00			`}`

			`// Number of leading 1s determines the number of bytes we'll have to read`
			`let n_bytes = match (!first).leading_zeros() {`
Run cargo fix --edition-idioms 2019-02-28 23:55:02 +00:00			`n @ 2..=6 => n as usize,`
Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. 2015-05-28 05:13:51 +00:00			`1 => return Err("First byte is continuation byte.".to_string()),`
Run cargo fix --edition-idioms 2019-02-28 23:55:02 +00:00			`7..=8 => return Err("WTF is this byte??".to_string()),`
Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. 2015-05-28 05:13:51 +00:00			`_ => unreachable!(),`
			`};`

			`let mut res = 0u32;`

			`// First, get the data - only the few last bits`
fix some clippy warnings 2017-10-11 16:09:49 +00:00			`res \|= u32::from(first & make_mask(7 - n_bytes));`
Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. 2015-05-28 05:13:51 +00:00
			`// We already have one byte, now read the others.`
			`for _ in 1..n_bytes {`
Replace try! with ? 2018-12-17 22:02:29 +00:00			`let byte = next().ok_or_else(\|\| "Missing UTF-8 byte".to_string())?;`
Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. 2015-05-28 05:13:51 +00:00			`if byte & 0xC0 != 0x80 {`
Rustfmt 2017-10-12 23:38:55 +00:00			`return Err(format!(`
			`"Found non-continuation byte after leading: \`
			`{}",`
			`byte`
			`));`
Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. 2015-05-28 05:13:51 +00:00			`}`
			`// We have 6 fresh new bits to read, make room.`
			`res <<= 6;`
			`// 0x3F is 00111111, so we keep the last 6 bits`
fix some clippy warnings 2017-10-11 16:09:49 +00:00			`res \|= u32::from(byte & 0x3F);`
Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. 2015-05-28 05:13:51 +00:00			`}`

			`// from_u32 could return an error if we gave it invalid utf-8.`
			`// But we're probably safe since we respected the rules when building it.`
			`Ok(from_u32(res).unwrap())`
			`}`

			`// Returns a simple bitmask with n 1s to the right.`
Fix warnings * Allow dead code for curses-only utf8 module * Remove intermediate variable in IdView 2017-03-27 20:27:50 +00:00			`#[allow(dead_code)]`
Add UTF-8 input support Didn't the proper methods in the standard lib, so I implemented a simple utf-8 char reader. 2015-05-28 05:13:51 +00:00			`fn make_mask(n: usize) -> u8 {`
			`let mut r = 0u8;`
			`for i in 0..n {`
			`r \|= 1 << i;`
			`}`
			`r`
			`}`