1use alloc::collections::TryReserveError;
2use core::fmt;
3use core::ops::Range;
4use core::slice::SliceIndex;
5
6use bstr::ByteSlice;
7use scolapasta_strbuf::Buf;
8
9use crate::case_folding::CaseFoldingEffect;
10use crate::codepoints::InvalidCodepointError;
11use crate::enc::binascii;
12use crate::iter::{Bytes, IntoIter, Iter, IterMut};
13use crate::ord::OrdError;
14
15mod codepoints;
16mod eq;
17mod impls;
18mod inspect;
19#[cfg(feature = "std")]
20mod io;
21
22pub use codepoints::Codepoints;
23pub use inspect::Inspect;
24
25#[repr(transparent)]
26#[derive(Default, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
27pub struct BinaryString {
28 inner: Buf,
29}
30
31impl BinaryString {
33 pub const fn new(buf: Buf) -> Self {
34 Self { inner: buf }
35 }
36}
37
38impl fmt::Debug for BinaryString {
39 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40 f.debug_struct("BinaryString")
41 .field("buf", &self.inner.as_bstr())
42 .finish()
43 }
44}
45
46impl BinaryString {
48 #[inline]
49 #[must_use]
50 pub fn into_buf(self) -> Buf {
51 self.inner
52 }
53
54 #[inline]
55 #[must_use]
56 pub fn as_slice(&self) -> &[u8] {
57 self.inner.as_slice()
58 }
59
60 #[inline]
61 #[must_use]
62 pub fn as_mut_slice(&mut self) -> &mut [u8] {
63 self.inner.as_mut_slice()
64 }
65
66 #[inline]
67 #[must_use]
68 pub fn as_ptr(&self) -> *const u8 {
69 self.inner.as_ptr()
70 }
71
72 #[inline]
73 #[must_use]
74 pub fn as_mut_ptr(&mut self) -> *mut u8 {
75 self.inner.as_mut_ptr()
76 }
77}
78
79impl BinaryString {
81 #[inline]
82 #[must_use]
83 pub fn iter(&self) -> Iter<'_> {
84 Iter::from_slice(&self.inner)
85 }
86
87 #[inline]
88 #[must_use]
89 pub fn iter_mut(&mut self) -> IterMut<'_> {
90 IterMut::from_mut_slice(&mut self.inner)
91 }
92
93 #[inline]
94 #[must_use]
95 pub fn bytes(&self) -> Bytes<'_> {
96 Bytes::from_slice(&self.inner)
97 }
98
99 #[inline]
100 #[must_use]
101 pub fn into_iter(self) -> IntoIter {
102 IntoIter::from_vec(self.inner.into_inner())
103 }
104}
105
106impl BinaryString {
108 #[inline]
109 pub fn len(&self) -> usize {
110 self.inner.len()
111 }
112
113 #[inline]
114 pub unsafe fn set_len(&mut self, len: usize) {
115 unsafe {
118 self.inner.set_len(len);
119 }
120 }
121
122 #[inline]
123 #[must_use]
124 pub fn capacity(&self) -> usize {
125 self.inner.capacity()
126 }
127
128 #[inline]
129 pub fn clear(&mut self) {
130 self.inner.clear();
131 }
132
133 #[inline]
134 #[must_use]
135 pub fn is_empty(&self) -> bool {
136 self.inner.is_empty()
137 }
138
139 #[inline]
140 pub fn truncate(&mut self, len: usize) {
141 self.inner.truncate(len);
142 }
143
144 #[inline]
145 #[must_use]
146 pub fn char_len(&self) -> usize {
147 self.len()
148 }
149}
150
151impl BinaryString {
153 #[inline]
154 pub fn reserve(&mut self, additional: usize) {
155 self.inner.reserve(additional);
156 }
157
158 #[inline]
159 pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
160 self.inner.try_reserve(additional)
161 }
162
163 #[inline]
164 pub fn reserve_exact(&mut self, additional: usize) {
165 self.inner.reserve_exact(additional);
166 }
167
168 #[inline]
169 pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
170 self.inner.try_reserve_exact(additional)
171 }
172
173 #[inline]
174 pub fn shrink_to_fit(&mut self) {
175 self.inner.shrink_to_fit();
176 }
177
178 #[inline]
179 pub fn shrink_to(&mut self, min_capacity: usize) {
180 self.inner.shrink_to(min_capacity);
181 }
182}
183
184impl BinaryString {
186 #[inline]
187 #[must_use]
188 pub fn get<I>(&self, index: I) -> Option<&I::Output>
189 where
190 I: SliceIndex<[u8]>,
191 {
192 self.inner.get(index)
193 }
194
195 #[inline]
196 #[must_use]
197 pub fn get_char(&self, index: usize) -> Option<&'_ [u8]> {
198 self.get(index..=index)
199 }
200
201 #[inline]
202 #[must_use]
203 pub fn get_char_slice(&self, range: Range<usize>) -> Option<&'_ [u8]> {
204 let Range { start, end } = range;
205
206 self.inner.get(start..end).or_else(|| {
207 if start > self.inner.len() {
208 None
209 } else if end <= start {
210 Some(&[])
211 } else {
212 self.inner.get(start..)
213 }
214 })
215 }
216
217 #[inline]
218 #[must_use]
219 pub fn get_mut<I>(&mut self, index: I) -> Option<&mut I::Output>
220 where
221 I: SliceIndex<[u8]>,
222 {
223 self.inner.get_mut(index)
224 }
225
226 #[inline]
227 #[must_use]
228 pub unsafe fn get_unchecked<I>(&self, index: I) -> &I::Output
229 where
230 I: SliceIndex<[u8]>,
231 {
232 unsafe { self.inner.get_unchecked(index) }
235 }
236
237 #[inline]
238 #[must_use]
239 pub unsafe fn get_unchecked_mut<I>(&mut self, index: I) -> &mut I::Output
240 where
241 I: SliceIndex<[u8]>,
242 {
243 unsafe { self.inner.get_unchecked_mut(index) }
246 }
247}
248
249impl BinaryString {
251 #[inline]
252 pub fn push_byte(&mut self, byte: u8) {
253 self.inner.push_byte(byte);
254 }
255
256 #[inline]
257 pub fn try_push_codepoint(&mut self, codepoint: i64) -> Result<(), InvalidCodepointError> {
258 if let Ok(byte) = u8::try_from(codepoint) {
259 self.push_byte(byte);
260 Ok(())
261 } else {
262 Err(InvalidCodepointError::codepoint_out_of_range(codepoint))
263 }
264 }
265
266 #[inline]
267 pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> {
268 self.try_push_codepoint(int)
269 }
270
271 #[inline]
272 pub fn push_char(&mut self, ch: char) {
273 self.inner.push_char(ch);
274 }
275
276 #[inline]
277 pub fn push_str(&mut self, s: &str) {
278 self.inner.push_str(s);
279 }
280
281 #[inline]
282 pub fn extend_from_slice(&mut self, other: &[u8]) {
283 self.inner.extend_from_slice(other);
284 }
285}
286
287impl BinaryString {
289 #[inline]
290 #[must_use]
291 pub fn is_ascii_only(&self) -> bool {
292 self.inner.is_ascii()
293 }
294
295 #[inline]
296 #[must_use]
297 #[expect(clippy::unused_self, reason = "API compatibility with other encoded string types")]
298 pub fn is_valid_encoding(&self) -> bool {
299 true
300 }
301}
302
303impl BinaryString {
305 #[inline]
306 pub fn make_capitalized(&mut self) -> CaseFoldingEffect {
307 binascii::make_capitalized(self.as_mut_slice())
308 }
309
310 #[inline]
311 pub fn make_lowercase(&mut self) -> CaseFoldingEffect {
312 binascii::make_lowercase(self.as_mut_slice())
313 }
314
315 #[inline]
316 pub fn make_uppercase(&mut self) -> CaseFoldingEffect {
317 binascii::make_uppercase(self.as_mut_slice())
318 }
319
320 #[inline]
321 pub fn make_swapcase(&mut self) -> CaseFoldingEffect {
322 binascii::make_swapcase(self.as_mut_slice())
323 }
324}
325
326impl BinaryString {
327 #[inline]
328 #[must_use]
329 pub fn chr(&self) -> &[u8] {
330 self.inner.get(0..1).unwrap_or_default()
331 }
332
333 #[inline]
334 pub fn ord(&self) -> Result<u32, OrdError> {
335 let byte = self.inner.first().copied().ok_or_else(OrdError::empty_string)?;
336 Ok(u32::from(byte))
337 }
338
339 #[inline]
340 #[must_use]
341 pub fn ends_with(&self, slice: &[u8]) -> bool {
342 self.inner.ends_with(slice)
343 }
344
345 #[inline]
346 pub fn reverse(&mut self) {
347 self.inner.reverse();
348 }
349}
350
351impl BinaryString {
353 #[inline]
354 #[must_use]
355 pub fn index(&self, needle: &[u8], offset: usize) -> Option<usize> {
356 let buf = self.get(offset..)?;
357 let index = buf.find(needle)?;
358 Some(index + offset)
359 }
360
361 #[inline]
362 #[must_use]
363 pub fn rindex(&self, needle: &[u8], offset: usize) -> Option<usize> {
364 let buf = self.get(..=offset).unwrap_or_else(|| self.as_slice());
365 let index = buf.rfind(needle)?;
366 Some(index)
367 }
368}
369
370#[cfg(test)]
371mod tests {
372 use alloc::string::String;
373 use alloc::vec::Vec;
374
375 use super::BinaryString;
376 use crate::test::run_arbitrary;
377
378 #[test]
379 fn prop_fuzz_char_len_utf8_contents_binary_string() {
380 run_arbitrary::<String>(|contents| {
381 let expected = contents.len();
382 let s = BinaryString::from(contents);
383 assert_eq!(s.char_len(), expected);
384 });
385 }
386
387 #[test]
388 fn prop_fuzz_len_utf8_contents_binary_string() {
389 run_arbitrary::<String>(|contents| {
390 let expected = contents.len();
391 let s = BinaryString::from(contents);
392 assert_eq!(s.len(), expected);
393 });
394 }
395
396 #[test]
397 fn prop_fuzz_char_len_binary_contents_binary_string() {
398 run_arbitrary::<Vec<u8>>(|contents| {
399 let expected = contents.len();
400 let s = BinaryString::from(contents);
401 assert_eq!(s.char_len(), expected);
402 });
403 }
404
405 #[test]
406 fn prop_fuzz_len_binary_contents_binary_string() {
407 run_arbitrary::<Vec<u8>>(|contents| {
408 let expected = contents.len();
409 let s = BinaryString::from(contents);
410 assert_eq!(s.len(), expected);
411 });
412 }
413
414 #[test]
415 fn constructs_empty_buffer() {
416 let s = BinaryString::from(Vec::new());
417 assert_eq!(0, s.len());
418 }
419
420 #[test]
421 fn casing_binary_string_empty() {
422 let mut s = BinaryString::from(b"");
423
424 s.make_capitalized();
425 assert_eq!(s, "");
426
427 s.make_lowercase();
428 assert_eq!(s, "");
429
430 s.make_uppercase();
431 assert_eq!(s, "");
432
433 s.make_swapcase();
434 assert_eq!(s, "");
435 }
436
437 #[test]
438 fn casing_binary_string_ascii() {
439 let lower = BinaryString::from(b"abc");
440 let mid_upper = BinaryString::from(b"aBc");
441 let upper = BinaryString::from(b"ABC");
442 let long = BinaryString::from(b"aBC, 123, ABC, baby you and me girl");
443
444 let capitalize: fn(&BinaryString) -> BinaryString = |value: &BinaryString| {
445 let mut value = value.clone();
446 value.make_capitalized();
447 value
448 };
449 let lowercase: fn(&BinaryString) -> BinaryString = |value: &BinaryString| {
450 let mut value = value.clone();
451 value.make_lowercase();
452 value
453 };
454 let uppercase: fn(&BinaryString) -> BinaryString = |value: &BinaryString| {
455 let mut value = value.clone();
456 value.make_uppercase();
457 value
458 };
459 let swapcase: fn(&BinaryString) -> BinaryString = |value: &BinaryString| {
460 let mut value = value.clone();
461 value.make_swapcase();
462 value
463 };
464
465 assert_eq!(capitalize(&lower), "Abc");
466 assert_eq!(capitalize(&mid_upper), "Abc");
467 assert_eq!(capitalize(&upper), "Abc");
468 assert_eq!(capitalize(&long), "Abc, 123, abc, baby you and me girl");
469
470 assert_eq!(lowercase(&lower), "abc");
471 assert_eq!(lowercase(&mid_upper), "abc");
472 assert_eq!(lowercase(&upper), "abc");
473 assert_eq!(lowercase(&long), "abc, 123, abc, baby you and me girl");
474
475 assert_eq!(uppercase(&lower), "ABC");
476 assert_eq!(uppercase(&mid_upper), "ABC");
477 assert_eq!(uppercase(&upper), "ABC");
478 assert_eq!(uppercase(&long), "ABC, 123, ABC, BABY YOU AND ME GIRL");
479
480 assert_eq!(swapcase(&lower), "ABC");
481 assert_eq!(swapcase(&mid_upper), "AbC");
482 assert_eq!(swapcase(&upper), "abc");
483 assert_eq!(swapcase(&long), "Abc, 123, abc, BABY YOU AND ME GIRL");
484 }
485
486 #[test]
487 fn casing_binary_string_utf8() {
488 let sharp_s = BinaryString::from("ß");
489 let tomorrow = BinaryString::from("αύριο");
490 let year = BinaryString::from("έτος");
491 let two_byte_chars = BinaryString::from("𐐜 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐙𐐊𐐡𐐝𐐓/𐐝𐐇𐐗𐐊𐐤𐐔 𐐒𐐋𐐗 𐐒𐐌 𐐜 𐐡𐐀𐐖𐐇𐐤𐐓𐐝 𐐱𐑂 𐑄 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐏𐐆𐐅𐐤𐐆𐐚𐐊𐐡𐐝𐐆𐐓𐐆");
494 let varying_length = BinaryString::from("zȺȾ");
497 let rtl = BinaryString::from("مرحبا الخرشوف");
498
499 let capitalize: fn(&BinaryString) -> BinaryString = |value: &BinaryString| {
500 let mut value = value.clone();
501 value.make_capitalized();
502 value
503 };
504 let lowercase: fn(&BinaryString) -> BinaryString = |value: &BinaryString| {
505 let mut value = value.clone();
506 value.make_lowercase();
507 value
508 };
509 let uppercase: fn(&BinaryString) -> BinaryString = |value: &BinaryString| {
510 let mut value = value.clone();
511 value.make_uppercase();
512 value
513 };
514 let swapcase: fn(&BinaryString) -> BinaryString = |value: &BinaryString| {
515 let mut value = value.clone();
516 value.make_swapcase();
517 value
518 };
519
520 assert_eq!(capitalize(&sharp_s), "ß");
521 assert_eq!(capitalize(&tomorrow), "αύριο");
522 assert_eq!(capitalize(&year), "έτος");
523 assert_eq!(
524 capitalize(&two_byte_chars),
525 "𐐜 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐙𐐊𐐡𐐝𐐓/𐐝𐐇𐐗𐐊𐐤𐐔 𐐒𐐋𐐗 𐐒𐐌 𐐜 𐐡𐐀𐐖𐐇𐐤𐐓𐐝 𐐱𐑂 𐑄 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐏𐐆𐐅𐐤𐐆𐐚𐐊𐐡𐐝𐐆𐐓𐐆"
526 );
527 assert_eq!(capitalize(&varying_length), "ZȺȾ");
528 assert_eq!(capitalize(&rtl), "مرحبا الخرشوف");
529
530 assert_eq!(lowercase(&sharp_s), "ß");
531 assert_eq!(lowercase(&tomorrow), "αύριο");
532 assert_eq!(lowercase(&year), "έτος");
533 assert_eq!(
534 lowercase(&two_byte_chars),
535 "𐐜 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐙𐐊𐐡𐐝𐐓/𐐝𐐇𐐗𐐊𐐤𐐔 𐐒𐐋𐐗 𐐒𐐌 𐐜 𐐡𐐀𐐖𐐇𐐤𐐓𐐝 𐐱𐑂 𐑄 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐏𐐆𐐅𐐤𐐆𐐚𐐊𐐡𐐝𐐆𐐓𐐆"
536 );
537 assert_eq!(lowercase(&varying_length), "zȺȾ");
538 assert_eq!(lowercase(&rtl), "مرحبا الخرشوف");
539
540 assert_eq!(uppercase(&sharp_s), "ß");
541 assert_eq!(uppercase(&tomorrow), "αύριο");
542 assert_eq!(uppercase(&year), "έτος");
543 assert_eq!(
544 uppercase(&two_byte_chars),
545 "𐐜 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐙𐐊𐐡𐐝𐐓/𐐝𐐇𐐗𐐊𐐤𐐔 𐐒𐐋𐐗 𐐒𐐌 𐐜 𐐡𐐀𐐖𐐇𐐤𐐓𐐝 𐐱𐑂 𐑄 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐏𐐆𐐅𐐤𐐆𐐚𐐊𐐡𐐝𐐆𐐓𐐆"
546 );
547 assert_eq!(uppercase(&varying_length), "ZȺȾ");
548 assert_eq!(uppercase(&rtl), "مرحبا الخرشوف");
549
550 assert_eq!(swapcase(&sharp_s), "ß");
551 assert_eq!(swapcase(&tomorrow), "αύριο");
552 assert_eq!(swapcase(&year), "έτος");
553 assert_eq!(
554 swapcase(&two_byte_chars),
555 "𐐜 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐙𐐊𐐡𐐝𐐓/𐐝𐐇𐐗𐐊𐐤𐐔 𐐒𐐋𐐗 𐐒𐐌 𐐜 𐐡𐐀𐐖𐐇𐐤𐐓𐐝 𐐱𐑂 𐑄 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐏𐐆𐐅𐐤𐐆𐐚𐐊𐐡𐐝𐐆𐐓𐐆"
556 );
557 assert_eq!(swapcase(&varying_length), "ZȺȾ");
558 assert_eq!(swapcase(&rtl), "مرحبا الخرشوف");
559 }
560
561 #[test]
562 fn casing_binary_string_invalid_utf8() {
563 let mut s = BinaryString::from(b"\xFF\xFE");
564
565 s.make_capitalized();
566 assert_eq!(s, &b"\xFF\xFE"[..]);
567
568 s.make_lowercase();
569 assert_eq!(s, &b"\xFF\xFE"[..]);
570
571 s.make_uppercase();
572 assert_eq!(s, &b"\xFF\xFE"[..]);
573
574 s.make_swapcase();
575 assert_eq!(s, &b"\xFF\xFE"[..]);
576 }
577
578 #[test]
579 fn casing_binary_string_unicode_replacement_character() {
580 let mut s = BinaryString::from("�");
581
582 s.make_capitalized();
583 assert_eq!(s, "�");
584
585 s.make_lowercase();
586 assert_eq!(s, "�");
587
588 s.make_uppercase();
589 assert_eq!(s, "�");
590
591 s.make_swapcase();
592 assert_eq!(s, "�");
593 }
594
595 #[test]
596 fn get_char_slice_valid_range() {
597 let s = BinaryString::from("abc");
598 assert_eq!(s.get_char_slice(0..0), Some(&b""[..]));
599 assert_eq!(s.get_char_slice(0..1), Some(&b"a"[..]));
600 assert_eq!(s.get_char_slice(0..2), Some(&b"ab"[..]));
601 assert_eq!(s.get_char_slice(0..3), Some(&b"abc"[..]));
602 assert_eq!(s.get_char_slice(0..4), Some(&b"abc"[..]));
603 assert_eq!(s.get_char_slice(1..1), Some(&b""[..]));
604 assert_eq!(s.get_char_slice(1..2), Some(&b"b"[..]));
605 assert_eq!(s.get_char_slice(1..3), Some(&b"bc"[..]));
606 }
607
608 #[test]
609 #[expect(clippy::reversed_empty_ranges, reason = "testing behavior of reversed ranges")]
610 fn get_char_slice_invalid_range() {
611 let s = BinaryString::from("abc");
612 assert_eq!(s.get_char_slice(4..5), None);
613 assert_eq!(s.get_char_slice(4..1), None);
614 assert_eq!(s.get_char_slice(3..1), Some(&b""[..]));
615 assert_eq!(s.get_char_slice(2..1), Some(&b""[..]));
616 }
617
618 #[test]
619 fn index_with_default_offset() {
620 let s = BinaryString::from(b"foo");
621 assert_eq!(s.index("f".as_bytes(), 0), Some(0));
622 assert_eq!(s.index("o".as_bytes(), 0), Some(1));
623 assert_eq!(s.index("oo".as_bytes(), 0), Some(1));
624 assert_eq!(s.index("ooo".as_bytes(), 0), None);
625 }
626
627 #[test]
628 fn index_with_different_offset() {
629 let s = BinaryString::from(b"foo");
630 assert_eq!(s.index("o".as_bytes(), 1), Some(1));
631 assert_eq!(s.index("o".as_bytes(), 2), Some(2));
632 assert_eq!(s.index("o".as_bytes(), 3), None);
633 }
634
635 #[test]
636 fn index_offset_no_overflow() {
637 let s = BinaryString::from(b"foo");
638 assert_eq!(s.index("o".as_bytes(), usize::MAX), None);
639 }
640
641 #[test]
642 fn index_empties() {
643 let s = BinaryString::from("");
652 assert_eq!(s.index(b"", 0), Some(0));
653
654 assert_eq!(s.index(b"a", 0), None);
655
656 let s = BinaryString::from("a");
657 assert_eq!(s.index(b"", 0), Some(0));
658 }
659
660 #[test]
661 fn rindex_with_default_offset() {
662 let s = BinaryString::from(b"foo");
663 assert_eq!(s.rindex("f".as_bytes(), 2), Some(0));
664 assert_eq!(s.rindex("o".as_bytes(), 2), Some(2));
665 assert_eq!(s.rindex("oo".as_bytes(), 2), Some(1));
666 assert_eq!(s.rindex("ooo".as_bytes(), 2), None);
667 }
668
669 #[test]
670 fn rindex_with_different_offset() {
671 let s = BinaryString::from(b"foo");
672 assert_eq!(s.rindex("o".as_bytes(), 3), Some(2));
673 assert_eq!(s.rindex("o".as_bytes(), 2), Some(2));
674 assert_eq!(s.rindex("o".as_bytes(), 1), Some(1));
675 assert_eq!(s.rindex("o".as_bytes(), 0), None);
676 }
677
678 #[test]
679 fn rindex_offset_no_overflow() {
680 let s = BinaryString::from(b"foo");
681 assert_eq!(s.rindex("o".as_bytes(), usize::MAX), Some(2));
682 }
683
684 #[test]
685 fn rindex_empties() {
686 let s = BinaryString::from("");
695 assert_eq!(s.rindex(b"", usize::MAX), Some(0));
696 assert_eq!(s.rindex(b"", 1), Some(0));
697 assert_eq!(s.rindex(b"", 0), Some(0));
698
699 assert_eq!(s.rindex(b"a", usize::MAX), None);
700 assert_eq!(s.rindex(b"a", 1), None);
701 assert_eq!(s.rindex(b"a", 0), None);
702
703 let s = BinaryString::from("a");
704 assert_eq!(s.rindex(b"", usize::MAX), Some(1));
705 assert_eq!(s.rindex(b"", 1), Some(1));
706 }
707}