spinoso_string/enc/binary/
codepoints.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
use core::slice;

#[derive(Debug, Clone)]
pub struct Codepoints<'a> {
    inner: slice::Iter<'a, u8>,
}

impl<'a> Codepoints<'a> {
    #[inline]
    pub fn new(bytes: &'a [u8]) -> Self {
        Self { inner: bytes.iter() }
    }
}

impl Iterator for Codepoints<'_> {
    type Item = u32;

    fn next(&mut self) -> Option<Self::Item> {
        self.inner.next().map(|&b| u32::from(b))
    }
}

impl Default for Codepoints<'_> {
    #[inline]
    fn default() -> Self {
        Self::new(b"")
    }
}

#[cfg(test)]
mod tests {
    use alloc::vec::Vec;

    use super::*;
    use crate::enc::binary::BinaryString;

    #[test]
    fn test_valid_ascii() {
        let s = BinaryString::from("abc");
        let codepoints = Codepoints::new(&s);
        assert_eq!(codepoints.collect::<Vec<_>>(), &[97, 98, 99]);
    }

    #[test]
    fn test_utf8_interpreted_as_bytes() {
        let s = BinaryString::from("abc💎");
        let codepoints = Codepoints::new(&s);
        assert_eq!(codepoints.collect::<Vec<_>>(), &[97, 98, 99, 240, 159, 146, 142]);
    }

    #[test]
    fn test_invalid_utf8_interpreted_as_bytes() {
        let s = BinaryString::from(b"abc\xFF");
        let codepoints = Codepoints::new(&s);
        assert_eq!(codepoints.collect::<Vec<_>>(), &[97, 98, 99, 255]);
    }
}