1use core::iter::FusedIterator;
2
3use crate::{Encoding, String};
4
5#[derive(Default, Debug, Clone)]
6pub struct Chars<'a>(State<'a>);
7
8impl<'a> From<&'a String> for Chars<'a> {
9 #[inline]
10 fn from(s: &'a String) -> Self {
11 let state = match s.encoding() {
12 Encoding::Utf8 => {
13 let iter = ConventionallyUtf8::with_bytes(s.as_slice());
14 State::Utf8(iter)
15 }
16 Encoding::Ascii => {
17 let iter = Bytes::with_bytes(s.as_slice());
18 State::Ascii(iter)
19 }
20 Encoding::Binary => {
21 let iter = Bytes::with_bytes(s.as_slice());
22 State::Binary(iter)
23 }
24 };
25 Self(state)
26 }
27}
28
29impl<'a> Iterator for Chars<'a> {
30 type Item = &'a [u8];
31
32 #[inline]
33 fn next(&mut self) -> Option<Self::Item> {
34 self.0.next()
35 }
36}
37
38impl FusedIterator for Chars<'_> {}
39
40impl Chars<'_> {
41 pub(crate) fn new() -> Self {
42 const EMPTY: &[u8] = &[];
43
44 Self(State::Binary(Bytes::from(EMPTY)))
45 }
46}
47
48#[derive(Debug, Clone)]
49enum State<'a> {
50 Utf8(ConventionallyUtf8<'a>),
51 Ascii(Bytes<'a>),
52 Binary(Bytes<'a>),
53}
54
55impl Default for State<'_> {
56 fn default() -> Self {
57 Self::Utf8(ConventionallyUtf8::new())
58 }
59}
60
61impl<'a> Iterator for State<'a> {
62 type Item = &'a [u8];
63
64 #[inline]
65 fn next(&mut self) -> Option<Self::Item> {
66 match self {
67 Self::Ascii(iter) | Self::Binary(iter) => iter.next(),
68 Self::Utf8(iter) => iter.next(),
69 }
70 }
71}
72
73impl FusedIterator for State<'_> {}
74
75#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
76struct Bytes<'a> {
77 bytes: &'a [u8],
78}
79
80impl<'a> Bytes<'a> {
81 #[inline]
82 const fn with_bytes(bytes: &'a [u8]) -> Self {
83 Self { bytes }
84 }
85}
86
87impl<'a> From<&'a [u8]> for Bytes<'a> {
88 #[inline]
89 fn from(bytes: &'a [u8]) -> Self {
90 Self::with_bytes(bytes)
91 }
92}
93
94impl<'a> Iterator for Bytes<'a> {
95 type Item = &'a [u8];
96
97 #[inline]
98 fn next(&mut self) -> Option<Self::Item> {
99 if let Some((next, remainder)) = self.bytes.split_at_checked(1) {
100 self.bytes = remainder;
101 Some(next)
102 } else {
103 None
104 }
105 }
106}
107
108impl FusedIterator for Bytes<'_> {}
109
110#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
111struct InvalidBytes<'a> {
112 bytes: &'a [u8],
113}
114
115impl<'a> InvalidBytes<'a> {
116 #[inline]
117 const fn new() -> Self {
118 Self { bytes: &[] }
119 }
120
121 #[inline]
122 const fn with_bytes(bytes: &'a [u8]) -> Self {
123 Self { bytes }
124 }
125}
126
127impl<'a> From<&'a [u8]> for InvalidBytes<'a> {
128 fn from(bytes: &'a [u8]) -> Self {
129 Self::with_bytes(bytes)
130 }
131}
132
133impl<'a> Iterator for InvalidBytes<'a> {
134 type Item = &'a [u8];
135
136 #[inline]
137 fn next(&mut self) -> Option<Self::Item> {
138 if let Some((next, remainder)) = self.bytes.split_at_checked(1) {
139 self.bytes = remainder;
140 Some(next)
141 } else {
142 None
143 }
144 }
145}
146
147impl FusedIterator for InvalidBytes<'_> {}
148
149#[derive(Default, Debug, Clone)]
150pub struct ConventionallyUtf8<'a> {
151 bytes: &'a [u8],
152 invalid_bytes: InvalidBytes<'a>,
153}
154
155impl<'a> ConventionallyUtf8<'a> {
156 #[inline]
157 fn new() -> Self {
158 let bytes = &[];
159 Self {
160 bytes,
161 invalid_bytes: InvalidBytes::new(),
162 }
163 }
164
165 #[inline]
166 fn with_bytes(bytes: &'a [u8]) -> Self {
167 Self {
168 bytes,
169 invalid_bytes: InvalidBytes::new(),
170 }
171 }
172}
173
174impl<'a> From<&'a [u8]> for ConventionallyUtf8<'a> {
175 fn from(bytes: &'a [u8]) -> Self {
176 Self::with_bytes(bytes)
177 }
178}
179
180impl<'a> Iterator for ConventionallyUtf8<'a> {
181 type Item = &'a [u8];
182
183 #[inline]
184 fn next(&mut self) -> Option<Self::Item> {
185 if let Some(slice) = self.invalid_bytes.next() {
186 return Some(slice);
187 }
188 let (ch, size) = bstr::decode_utf8(self.bytes);
189 let (chunk, remainder) = unsafe { self.bytes.split_at_unchecked(size) };
191 self.bytes = remainder;
192
193 if ch.is_some() {
194 Some(chunk)
195 } else {
196 self.invalid_bytes = InvalidBytes::with_bytes(chunk);
198 self.invalid_bytes.next()
199 }
200 }
201}
202
203impl FusedIterator for ConventionallyUtf8<'_> {}