spinoso_string/
chars.rs

1use core::iter::FusedIterator;
2
3use crate::{Encoding, String};
4
5#[derive(Default, Debug, Clone)]
6pub struct Chars<'a>(State<'a>);
7
8impl<'a> From<&'a String> for Chars<'a> {
9    #[inline]
10    fn from(s: &'a String) -> Self {
11        let state = match s.encoding() {
12            Encoding::Utf8 => {
13                let iter = ConventionallyUtf8::with_bytes(s.as_slice());
14                State::Utf8(iter)
15            }
16            Encoding::Ascii => {
17                let iter = Bytes::with_bytes(s.as_slice());
18                State::Ascii(iter)
19            }
20            Encoding::Binary => {
21                let iter = Bytes::with_bytes(s.as_slice());
22                State::Binary(iter)
23            }
24        };
25        Self(state)
26    }
27}
28
29impl<'a> Iterator for Chars<'a> {
30    type Item = &'a [u8];
31
32    #[inline]
33    fn next(&mut self) -> Option<Self::Item> {
34        self.0.next()
35    }
36}
37
38impl FusedIterator for Chars<'_> {}
39
40impl Chars<'_> {
41    pub(crate) fn new() -> Self {
42        const EMPTY: &[u8] = &[];
43
44        Self(State::Binary(Bytes::from(EMPTY)))
45    }
46}
47
48#[derive(Debug, Clone)]
49enum State<'a> {
50    Utf8(ConventionallyUtf8<'a>),
51    Ascii(Bytes<'a>),
52    Binary(Bytes<'a>),
53}
54
55impl Default for State<'_> {
56    fn default() -> Self {
57        Self::Utf8(ConventionallyUtf8::new())
58    }
59}
60
61impl<'a> Iterator for State<'a> {
62    type Item = &'a [u8];
63
64    #[inline]
65    fn next(&mut self) -> Option<Self::Item> {
66        match self {
67            Self::Ascii(iter) | Self::Binary(iter) => iter.next(),
68            Self::Utf8(iter) => iter.next(),
69        }
70    }
71}
72
73impl FusedIterator for State<'_> {}
74
75#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
76struct Bytes<'a> {
77    bytes: &'a [u8],
78}
79
80impl<'a> Bytes<'a> {
81    #[inline]
82    const fn with_bytes(bytes: &'a [u8]) -> Self {
83        Self { bytes }
84    }
85}
86
87impl<'a> From<&'a [u8]> for Bytes<'a> {
88    #[inline]
89    fn from(bytes: &'a [u8]) -> Self {
90        Self::with_bytes(bytes)
91    }
92}
93
94impl<'a> Iterator for Bytes<'a> {
95    type Item = &'a [u8];
96
97    #[inline]
98    fn next(&mut self) -> Option<Self::Item> {
99        if let Some((next, remainder)) = self.bytes.split_at_checked(1) {
100            self.bytes = remainder;
101            Some(next)
102        } else {
103            None
104        }
105    }
106}
107
108impl FusedIterator for Bytes<'_> {}
109
110#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
111struct InvalidBytes<'a> {
112    bytes: &'a [u8],
113}
114
115impl<'a> InvalidBytes<'a> {
116    #[inline]
117    const fn new() -> Self {
118        Self { bytes: &[] }
119    }
120
121    #[inline]
122    const fn with_bytes(bytes: &'a [u8]) -> Self {
123        Self { bytes }
124    }
125}
126
127impl<'a> From<&'a [u8]> for InvalidBytes<'a> {
128    fn from(bytes: &'a [u8]) -> Self {
129        Self::with_bytes(bytes)
130    }
131}
132
133impl<'a> Iterator for InvalidBytes<'a> {
134    type Item = &'a [u8];
135
136    #[inline]
137    fn next(&mut self) -> Option<Self::Item> {
138        if let Some((next, remainder)) = self.bytes.split_at_checked(1) {
139            self.bytes = remainder;
140            Some(next)
141        } else {
142            None
143        }
144    }
145}
146
147impl FusedIterator for InvalidBytes<'_> {}
148
149#[derive(Default, Debug, Clone)]
150pub struct ConventionallyUtf8<'a> {
151    bytes: &'a [u8],
152    invalid_bytes: InvalidBytes<'a>,
153}
154
155impl<'a> ConventionallyUtf8<'a> {
156    #[inline]
157    fn new() -> Self {
158        let bytes = &[];
159        Self {
160            bytes,
161            invalid_bytes: InvalidBytes::new(),
162        }
163    }
164
165    #[inline]
166    fn with_bytes(bytes: &'a [u8]) -> Self {
167        Self {
168            bytes,
169            invalid_bytes: InvalidBytes::new(),
170        }
171    }
172}
173
174impl<'a> From<&'a [u8]> for ConventionallyUtf8<'a> {
175    fn from(bytes: &'a [u8]) -> Self {
176        Self::with_bytes(bytes)
177    }
178}
179
180impl<'a> Iterator for ConventionallyUtf8<'a> {
181    type Item = &'a [u8];
182
183    #[inline]
184    fn next(&mut self) -> Option<Self::Item> {
185        if let Some(slice) = self.invalid_bytes.next() {
186            return Some(slice);
187        }
188        let (ch, size) = bstr::decode_utf8(self.bytes);
189        // SAFETY: bstr guarantees that the size is within the bounds of the slice.
190        let (chunk, remainder) = unsafe { self.bytes.split_at_unchecked(size) };
191        self.bytes = remainder;
192
193        if ch.is_some() {
194            Some(chunk)
195        } else {
196            // Invalid UTF-8 bytes are yielded as byte slices one byte at a time.
197            self.invalid_bytes = InvalidBytes::with_bytes(chunk);
198            self.invalid_bytes.next()
199        }
200    }
201}
202
203impl FusedIterator for ConventionallyUtf8<'_> {}