spinoso_string/enc/utf8/borrowed/
codepoints.rs

1use core::str::Chars;
2
3use super::Utf8Str;
4use crate::CodepointsError;
5
6#[derive(Debug, Clone)]
7pub struct Codepoints<'a> {
8    inner: Chars<'a>,
9}
10
11impl<'a> TryFrom<&'a Utf8Str> for Codepoints<'a> {
12    type Error = CodepointsError;
13
14    #[inline]
15    fn try_from(s: &'a Utf8Str) -> Result<Self, Self::Error> {
16        match simdutf8::basic::from_utf8(s.as_bytes()) {
17            Ok(s) => Ok(Self { inner: s.chars() }),
18            // ```
19            // [3.2.2] > s = "abc\xFFxyz"
20            // => "abc\xFFxyz"
21            // [3.2.2] > s.encoding
22            // => #<Encoding:UTF-8>
23            // [3.2.2] > s.codepoints
24            // (irb):5:in `codepoints': invalid byte sequence in UTF-8 (ArgumentError)
25            //         from (irb):5:in `<main>'
26            //         from /usr/local/var/rbenv/versions/3.2.2/lib/ruby/gems/3.2.0/gems/irb-1.6.2/exe/irb:11:in `<top (required)>'
27            //         from /usr/local/var/rbenv/versions/3.2.2/bin/irb:25:in `load'
28            //         from /usr/local/var/rbenv/versions/3.2.2/bin/irb:25:in `<main>'
29            // ```
30            Err(_) => Err(CodepointsError::invalid_utf8_codepoint()),
31        }
32    }
33}
34
35impl Iterator for Codepoints<'_> {
36    type Item = u32;
37
38    fn next(&mut self) -> Option<Self::Item> {
39        self.inner.next().map(u32::from)
40    }
41}
42
43impl Default for Codepoints<'_> {
44    #[inline]
45    fn default() -> Self {
46        Self { inner: "".chars() }
47    }
48}
49
50#[cfg(test)]
51mod tests {
52    use alloc::vec::Vec;
53
54    use super::*;
55
56    #[test]
57    fn test_valid_utf8() {
58        let s = Utf8Str::new("hellođź’Ž");
59        let codepoints = Codepoints::try_from(s).unwrap();
60        assert_eq!(codepoints.collect::<Vec<_>>(), &[104, 101, 108, 108, 111, 128_142]);
61    }
62
63    #[test]
64    fn test_invalid_utf8() {
65        let s = Utf8Str::new(b"hello\xFF");
66        let err = Codepoints::try_from(s).unwrap_err();
67        assert_eq!(err, CodepointsError::invalid_utf8_codepoint());
68    }
69}