1use core::iter::FusedIterator;
2
3use scolapasta_string_escape::InvalidUtf8ByteSequence;
4
5#[derive(Debug, Clone)]
6struct Delimiters {
7 bits: u8,
8}
9
10impl Default for Delimiters {
11 fn default() -> Self {
12 Self::DEFAULT
13 }
14}
15
16impl Delimiters {
17 const EMIT_LEFT_DELIMITER: Self = Self { bits: 0b0000_0001 };
18 const EMIT_RIGHT_DELIMITER: Self = Self { bits: 0b0000_0010 };
19
20 const DEFAULT: Self = Self {
21 bits: Self::EMIT_LEFT_DELIMITER.bits | Self::EMIT_RIGHT_DELIMITER.bits,
22 };
23
24 #[inline]
25 fn emit_left_delimiter(&mut self) -> Option<char> {
26 if (self.bits & Self::EMIT_LEFT_DELIMITER.bits) == Self::EMIT_LEFT_DELIMITER.bits {
27 self.bits &= !Self::EMIT_LEFT_DELIMITER.bits;
28 Some('/')
29 } else {
30 None
31 }
32 }
33
34 #[inline]
35 fn emit_right_delimiter(&mut self) -> Option<char> {
36 if (self.bits & Self::EMIT_RIGHT_DELIMITER.bits) == Self::EMIT_RIGHT_DELIMITER.bits {
37 self.bits &= !Self::EMIT_RIGHT_DELIMITER.bits;
38 Some('/')
39 } else {
40 None
41 }
42 }
43}
44
45#[derive(Default, Debug, Clone)]
78#[must_use = "this `Debug` is an `Iterator`, which should be consumed if constructed"]
79pub struct Debug<'a> {
80 delimiters: Delimiters,
81 source: &'a [u8],
91 non_standard_control_escapes: &'static [u8],
92 literal: InvalidUtf8ByteSequence,
93 options: &'static str,
94 encoding: &'static str,
95}
96
97impl<'a> Debug<'a> {
98 pub fn new(source: &'a [u8], options: &'static str, encoding: &'static str) -> Self {
127 Self {
128 delimiters: Delimiters::DEFAULT,
129 source,
130 non_standard_control_escapes: &[],
131 literal: InvalidUtf8ByteSequence::new(),
132 options,
133 encoding,
134 }
135 }
136}
137
138impl Iterator for Debug<'_> {
139 type Item = char;
140
141 fn next(&mut self) -> Option<Self::Item> {
142 if let Some(prefix) = self.delimiters.emit_left_delimiter() {
143 return Some(prefix);
144 }
145 if let Some((&next, tail)) = self.non_standard_control_escapes.split_first() {
146 self.non_standard_control_escapes = tail;
147 return Some(next.into());
148 }
149 if let Some(literal) = self.literal.next() {
150 return Some(literal);
151 }
152 if !self.source.is_empty() {
153 let (ch, size) = bstr::decode_utf8(self.source);
154 let (chunk, remainder) = unsafe { self.source.split_at_unchecked(size) };
156 self.source = remainder;
157
158 return match ch {
159 Some('/') => {
161 self.literal = InvalidUtf8ByteSequence::with_byte(b'/');
165 Some('\\')
166 }
167 Some('\x07') => {
168 let (&next, tail) = br"\x07".split_first().unwrap();
169 self.non_standard_control_escapes = tail;
170 Some(next.into())
171 }
172 Some('\x08') => {
173 let (&next, tail) = br"\x08".split_first().unwrap();
174 self.non_standard_control_escapes = tail;
175 Some(next.into())
176 }
177 Some('\x1B') => {
178 let (&next, tail) = br"\x1B".split_first().unwrap();
179 self.non_standard_control_escapes = tail;
180 Some(next.into())
181 }
182 Some(ch @ ('"' | '\'' | '\\')) => Some(ch),
183 Some(ch) if ch.is_ascii() && posix_space::is_space(ch as u8) => Some(ch),
184 Some(ch) if ch.is_ascii() => {
185 self.literal = InvalidUtf8ByteSequence::with_byte(ch as u8);
189 self.literal.next()
190 }
191 Some(ch) => Some(ch),
192 None => {
195 self.literal = InvalidUtf8ByteSequence::try_from(chunk).unwrap();
199 self.literal.next()
202 }
203 };
204 }
205 if let Some(suffix) = self.delimiters.emit_right_delimiter() {
206 return Some(suffix);
207 }
208 if let (Some(ch), size) = bstr::decode_utf8(self.options) {
209 self.options = &self.options[size..];
210 return Some(ch);
211 }
212 if let (Some(ch), size) = bstr::decode_utf8(self.encoding) {
213 self.encoding = &self.encoding[size..];
214 return Some(ch);
215 }
216 None
217 }
218}
219
220impl FusedIterator for Debug<'_> {}
221
222#[cfg(test)]
223mod tests {
224 use bstr::ByteSlice;
225
226 use super::Debug;
227
228 #[test]
231 fn iter_utf8_pattern_no_opt_no_enc() {
232 let debug = Debug::new(b"Artichoke Ruby", "", "");
237 let s = debug.collect::<String>();
238 assert_eq!(s, "/Artichoke Ruby/");
239 }
240
241 #[test]
242 fn iter_utf8_pattern_with_opts_no_enc() {
243 let debug = Debug::new(b"Artichoke Ruby", "i", "");
248 let s = debug.collect::<String>();
249 assert_eq!(s, "/Artichoke Ruby/i");
250
251 let debug = Debug::new(b"Artichoke Ruby", "mix", "");
256 let s = debug.collect::<String>();
257 assert_eq!(s, "/Artichoke Ruby/mix");
258 }
259
260 #[test]
261 fn iter_utf8_pattern_no_opts_with_enc() {
262 let debug = Debug::new(b"Artichoke Ruby", "", "n");
267 let s = debug.collect::<String>();
268 assert_eq!(s, "/Artichoke Ruby/n");
269 }
270
271 #[test]
272 fn iter_utf8_pattern_with_opts_with_enc() {
273 let debug = Debug::new(b"Artichoke Ruby", "ix", "n");
278 let s = debug.collect::<String>();
279 assert_eq!(s, "/Artichoke Ruby/ixn");
280 }
281
282 #[test]
283 fn iter_utf8_emoji_pattern_no_opt_no_enc() {
284 let debug = Debug::new("crab 🦀 for Rust".as_bytes(), "", "");
289 let s = debug.collect::<String>();
290 assert_eq!(s, "/crab 🦀 for Rust/");
291 }
292
293 #[test]
294 fn iter_utf8_emoji_pattern_with_opts_no_enc() {
295 let debug = Debug::new("crab 🦀 for Rust".as_bytes(), "i", "");
300 let s = debug.collect::<String>();
301 assert_eq!(s, "/crab 🦀 for Rust/i");
302
303 let debug = Debug::new("crab 🦀 for Rust".as_bytes(), "mix", "");
308 let s = debug.collect::<String>();
309 assert_eq!(s, "/crab 🦀 for Rust/mix");
310 }
311
312 #[test]
313 fn iter_ascii_escaped_byte_pattern_literal_ascii_control() {
314 let pattern = (0x00..=0x1F).collect::<Vec<u8>>();
318 let debug = Debug::new(&pattern, "", "");
319 let s = debug.collect::<String>();
320 assert_eq!(
321 s.as_bytes().as_bstr(),
322 [
323 47, 92, 120, 48, 48, 92, 120, 48, 49, 92, 120, 48, 50, 92, 120, 48, 51, 92, 120, 48, 52, 92, 120, 48,
324 53, 92, 120, 48, 54, 92, 120, 48, 55, 92, 120, 48, 56, 9, 10, 11, 12, 13, 92, 120, 48, 69, 92, 120,
325 48, 70, 92, 120, 49, 48, 92, 120, 49, 49, 92, 120, 49, 50, 92, 120, 49, 51, 92, 120, 49, 52, 92, 120,
326 49, 53, 92, 120, 49, 54, 92, 120, 49, 55, 92, 120, 49, 56, 92, 120, 49, 57, 92, 120, 49, 65, 92, 120,
327 49, 66, 92, 120, 49, 67, 92, 120, 49, 68, 92, 120, 49, 69, 92, 120, 49, 70, 47_u8
328 ]
329 .as_bstr(),
330 );
331 }
332
333 #[test]
334 fn iter_ascii_pattern_exhaustive() {
335 let pattern = (0x00..=0x7F).filter(|b| !b"[](){}".contains(b)).collect::<Vec<u8>>();
339 let debug = Debug::new(&pattern, "", "");
340 let s = debug.collect::<String>();
341 assert_eq!(
342 s.as_bytes().as_bstr(),
343 [
344 47, 92, 120, 48, 48, 92, 120, 48, 49, 92, 120, 48, 50, 92, 120, 48, 51, 92, 120, 48, 52, 92, 120, 48,
345 53, 92, 120, 48, 54, 92, 120, 48, 55, 92, 120, 48, 56, 9, 10, 11, 12, 13, 92, 120, 48, 69, 92, 120,
346 48, 70, 92, 120, 49, 48, 92, 120, 49, 49, 92, 120, 49, 50, 92, 120, 49, 51, 92, 120, 49, 52, 92, 120,
347 49, 53, 92, 120, 49, 54, 92, 120, 49, 55, 92, 120, 49, 56, 92, 120, 49, 57, 92, 120, 49, 65, 92, 120,
348 49, 66, 92, 120, 49, 67, 92, 120, 49, 68, 92, 120, 49, 69, 92, 120, 49, 70, 32, 33, 34, 35, 36, 37,
349 38, 39, 42, 43, 44, 45, 46, 92, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
350 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
351 89, 90, 92, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
352 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 124, 126, 92, 120, 55, 70, 47_u8
353 ]
354 .as_bstr(),
355 );
356 }
357
358 #[test]
359 fn iter_ascii_pattern_escaped_exhaustive() {
360 let pattern = &[
365 0, 1, 2, 3, 4, 5, 6, 7, 8, 92, 116, 92, 110, 92, 118, 92, 102, 92, 114, 14, 15, 16, 17, 18, 19, 20, 21,
366 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 92, 32, 33, 34, 92, 35, 92, 36, 37, 38, 39, 92, 40, 92, 41, 92,
367 42, 92, 43, 44, 92, 45, 92, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 92, 63,
368 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
369 90, 92, 91, 92, 92, 92, 93, 92, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
370 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 92, 123, 92, 124, 92, 125, 126, 127_u8,
371 ];
372 let debug = Debug::new(pattern, "", "");
373 let s = debug.collect::<String>();
374 assert_eq!(
375 s.as_bytes().as_bstr(),
376 [
377 47, 92, 120, 48, 48, 92, 120, 48, 49, 92, 120, 48, 50, 92, 120, 48, 51, 92, 120, 48, 52, 92, 120, 48,
378 53, 92, 120, 48, 54, 92, 120, 48, 55, 92, 120, 48, 56, 92, 116, 92, 110, 92, 118, 92, 102, 92, 114,
379 92, 120, 48, 69, 92, 120, 48, 70, 92, 120, 49, 48, 92, 120, 49, 49, 92, 120, 49, 50, 92, 120, 49, 51,
380 92, 120, 49, 52, 92, 120, 49, 53, 92, 120, 49, 54, 92, 120, 49, 55, 92, 120, 49, 56, 92, 120, 49, 57,
381 92, 120, 49, 65, 92, 120, 49, 66, 92, 120, 49, 67, 92, 120, 49, 68, 92, 120, 49, 69, 92, 120, 49, 70,
382 92, 32, 33, 34, 92, 35, 92, 36, 37, 38, 39, 92, 40, 92, 41, 92, 42, 92, 43, 44, 92, 45, 92, 46, 92,
383 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 92, 63, 64, 65, 66, 67, 68, 69, 70,
384 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 91, 92, 92, 92,
385 93, 92, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
386 114, 115, 116, 117, 118, 119, 120, 121, 122, 92, 123, 92, 124, 92, 125, 126, 92, 120, 55, 70, 47_u8
387 ]
388 .as_bstr(),
389 );
390 }
391
392 #[test]
393 fn iter_ascii_escaped_byte_pattern_literal() {
394 let pattern = [92, 116, 92, 118, 92, 102, 92, 110];
401 let debug = Debug::new(&pattern, "", "");
402 let s = debug.collect::<String>();
403 assert_eq!(s, r"/\t\v\f\n/");
404
405 let debug = Debug::new(br"\t\v\f\n", "i", "");
410 let s = debug.collect::<String>();
411 assert_eq!(s, r"/\t\v\f\n/i");
412
413 let debug = Debug::new(br"\t\v\f\n", "mix", "");
418 let s = debug.collect::<String>();
419 assert_eq!(s, r"/\t\v\f\n/mix");
420
421 let debug = Debug::new(br"\t\v\f\n", "", "n");
426 let s = debug.collect::<String>();
427 assert_eq!(s, r"/\t\v\f\n/n");
428
429 let debug = Debug::new(br"\t\v\f\n", "ix", "n");
434 let s = debug.collect::<String>();
435 assert_eq!(s, r"/\t\v\f\n/ixn");
436 }
437
438 #[test]
439 fn iter_ascii_escaped_byte_pattern_compiled() {
440 let pattern = [9, 34];
447 let debug = Debug::new(&pattern, "", "");
448 let s = debug.collect::<String>();
449 assert_eq!(s, "/\t\"/");
450 }
451
452 #[test]
453 fn iter_invalid_utf8_pattern() {
454 let debug = Debug::new(b"\xFF\xFE", "", "");
459 let s = debug.collect::<String>();
460 assert_eq!(s, r"/\xFF\xFE/");
461 }
462}