spinoso_string/enc/
mod.rs

1use alloc::collections::TryReserveError;
2use core::cmp::Ordering;
3use core::hash::{Hash, Hasher};
4use core::mem;
5use core::ops::Range;
6use core::slice::SliceIndex;
7
8use ascii::AsciiString;
9use binary::BinaryString;
10use scolapasta_strbuf::Buf;
11use utf8::Utf8String;
12
13use crate::codepoints::InvalidCodepointError;
14use crate::encoding::Encoding;
15use crate::iter::{Bytes, IntoIter, Iter, IterMut};
16use crate::ord::OrdError;
17use crate::{CaseFoldingEffect, CodepointsError};
18
19mod ascii;
20mod binary;
21mod binascii;
22mod codepoints;
23mod impls;
24mod inspect;
25#[cfg(feature = "std")]
26mod io;
27mod utf8;
28
29pub use codepoints::Codepoints;
30pub use inspect::Inspect;
31
32#[derive(Clone)]
33pub enum EncodedString {
34    Ascii(AsciiString),
35    Binary(BinaryString),
36    Utf8(Utf8String),
37}
38
39impl Default for EncodedString {
40    fn default() -> Self {
41        Self::utf8(Buf::new())
42    }
43}
44
45impl Hash for EncodedString {
46    fn hash<H: Hasher>(&self, hasher: &mut H) {
47        // A `EncodedString`'s hash only depends on its byte contents.
48        //
49        // ```
50        // [3.0.2] > s = "abc"
51        // => "abc"
52        // [3.0.2] > t = s.dup.force_encoding(Encoding::ASCII)
53        // => "abc"
54        // [3.0.2] > s.hash
55        // => 3398383793005079442
56        // [3.0.2] > t.hash
57        // => 3398383793005079442
58        // ```
59        self.as_slice().hash(hasher);
60    }
61}
62
63impl PartialEq for EncodedString {
64    fn eq(&self, other: &Self) -> bool {
65        // Equality only depends on each `EncodedString`'s byte contents.
66        //
67        // ```
68        // [3.0.2] > s = "abc"
69        // => "abc"
70        // [3.0.2] > t = s.dup.force_encoding(Encoding::ASCII)
71        // => "abc"
72        // [3.0.2] > s == t
73        // => true
74        // ```
75        //
76        // See the functional tests in `string_test.rb` for more cases.
77        match (self.encoding(), other.encoding()) {
78            (self_enc, other_enc) if self_enc == other_enc => self.as_slice() == other.as_slice(),
79            _ if self.is_ascii_only() && other.is_ascii_only() => self.as_slice() == other.as_slice(),
80            _ => false,
81        }
82    }
83}
84
85impl Eq for EncodedString {}
86
87impl PartialOrd for EncodedString {
88    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
89        Some(self.cmp(other))
90    }
91}
92
93impl Ord for EncodedString {
94    fn cmp(&self, other: &Self) -> Ordering {
95        self.as_slice().cmp(other.as_slice())
96    }
97}
98
99// Constructors
100impl EncodedString {
101    #[inline]
102    #[must_use]
103    pub fn new(buf: Buf, encoding: Encoding) -> Self {
104        match encoding {
105            Encoding::Ascii => Self::ascii(buf),
106            Encoding::Binary => Self::binary(buf),
107            Encoding::Utf8 => Self::utf8(buf),
108        }
109    }
110
111    #[inline]
112    #[must_use]
113    pub fn ascii(buf: Buf) -> Self {
114        Self::Ascii(AsciiString::new(buf))
115    }
116
117    #[inline]
118    #[must_use]
119    pub fn binary(buf: Buf) -> Self {
120        Self::Binary(BinaryString::new(buf))
121    }
122
123    #[inline]
124    #[must_use]
125    pub fn utf8(buf: Buf) -> Self {
126        Self::Utf8(Utf8String::new(buf))
127    }
128}
129
130impl EncodedString {
131    #[inline]
132    #[must_use]
133    pub fn encoding(&self) -> Encoding {
134        match self {
135            EncodedString::Ascii(_) => Encoding::Ascii,
136            EncodedString::Binary(_) => Encoding::Binary,
137            EncodedString::Utf8(_) => Encoding::Utf8,
138        }
139    }
140
141    #[inline]
142    pub fn set_encoding(&mut self, encoding: Encoding) {
143        if self.encoding() == encoding {
144            return;
145        };
146        let s = mem::take(self);
147        let buf = s.into_buf();
148        *self = Self::new(buf, encoding);
149    }
150}
151
152// Defer to Encoded Implementation
153impl EncodedString {
154    #[inline]
155    #[must_use]
156    pub fn into_buf(self) -> Buf {
157        match self {
158            EncodedString::Ascii(inner) => inner.into_buf(),
159            EncodedString::Binary(inner) => inner.into_buf(),
160            EncodedString::Utf8(inner) => inner.into_buf(),
161        }
162    }
163
164    #[inline]
165    #[must_use]
166    pub fn into_iter(self) -> IntoIter {
167        match self {
168            EncodedString::Ascii(inner) => inner.into_iter(),
169            EncodedString::Binary(inner) => inner.into_iter(),
170            EncodedString::Utf8(inner) => inner.into_iter(),
171        }
172    }
173
174    #[inline]
175    #[must_use]
176    pub fn as_slice(&self) -> &[u8] {
177        match self {
178            EncodedString::Ascii(inner) => inner.as_slice(),
179            EncodedString::Binary(inner) => inner.as_slice(),
180            EncodedString::Utf8(inner) => inner.as_bytes(),
181        }
182    }
183
184    #[inline]
185    #[must_use]
186    pub fn as_mut_slice(&mut self) -> &mut [u8] {
187        match self {
188            EncodedString::Ascii(inner) => inner.as_mut_slice(),
189            EncodedString::Binary(inner) => inner.as_mut_slice(),
190            EncodedString::Utf8(inner) => inner.as_bytes_mut(),
191        }
192    }
193
194    #[inline]
195    #[must_use]
196    pub fn as_ptr(&self) -> *const u8 {
197        match self {
198            EncodedString::Ascii(inner) => inner.as_ptr(),
199            EncodedString::Binary(inner) => inner.as_ptr(),
200            EncodedString::Utf8(inner) => inner.as_ptr(),
201        }
202    }
203
204    #[inline]
205    #[must_use]
206    pub fn as_mut_ptr(&mut self) -> *mut u8 {
207        match self {
208            EncodedString::Ascii(inner) => inner.as_mut_ptr(),
209            EncodedString::Binary(inner) => inner.as_mut_ptr(),
210            EncodedString::Utf8(inner) => inner.as_mut_ptr(),
211        }
212    }
213
214    #[inline]
215    #[must_use]
216    pub fn len(&self) -> usize {
217        match self {
218            EncodedString::Ascii(inner) => inner.len(),
219            EncodedString::Binary(inner) => inner.len(),
220            EncodedString::Utf8(inner) => inner.len(),
221        }
222    }
223
224    #[inline]
225    pub unsafe fn set_len(&mut self, len: usize) {
226        // SAFETY: The caller must uphold the documented safety contract, which
227        // is the same as each encoded string's inner buffer.
228        unsafe {
229            match self {
230                EncodedString::Ascii(inner) => inner.set_len(len),
231                EncodedString::Binary(inner) => inner.set_len(len),
232                EncodedString::Utf8(inner) => inner.set_len(len),
233            }
234        }
235    }
236
237    #[inline]
238    #[must_use]
239    pub fn capacity(&self) -> usize {
240        match self {
241            EncodedString::Ascii(inner) => inner.capacity(),
242            EncodedString::Binary(inner) => inner.capacity(),
243            EncodedString::Utf8(inner) => inner.capacity(),
244        }
245    }
246
247    #[inline]
248    pub fn clear(&mut self) {
249        match self {
250            EncodedString::Ascii(inner) => inner.clear(),
251            EncodedString::Binary(inner) => inner.clear(),
252            EncodedString::Utf8(inner) => inner.clear(),
253        }
254    }
255
256    #[inline]
257    #[must_use]
258    pub fn is_empty(&self) -> bool {
259        match self {
260            EncodedString::Ascii(inner) => inner.is_empty(),
261            EncodedString::Binary(inner) => inner.is_empty(),
262            EncodedString::Utf8(inner) => inner.is_empty(),
263        }
264    }
265
266    #[inline]
267    pub fn truncate(&mut self, len: usize) {
268        match self {
269            EncodedString::Ascii(inner) => inner.truncate(len),
270            EncodedString::Binary(inner) => inner.truncate(len),
271            EncodedString::Utf8(inner) => inner.truncate(len),
272        };
273    }
274
275    #[inline]
276    #[must_use]
277    pub fn char_len(&self) -> usize {
278        match self {
279            EncodedString::Ascii(inner) => inner.char_len(),
280            EncodedString::Binary(inner) => inner.char_len(),
281            EncodedString::Utf8(inner) => inner.char_len(),
282        }
283    }
284
285    #[inline]
286    #[must_use]
287    pub fn iter(&self) -> Iter<'_> {
288        match self {
289            EncodedString::Ascii(inner) => inner.iter(),
290            EncodedString::Binary(inner) => inner.iter(),
291            EncodedString::Utf8(inner) => inner.iter(),
292        }
293    }
294
295    #[inline]
296    #[must_use]
297    pub fn iter_mut(&mut self) -> IterMut<'_> {
298        match self {
299            EncodedString::Ascii(inner) => inner.iter_mut(),
300            EncodedString::Binary(inner) => inner.iter_mut(),
301            EncodedString::Utf8(inner) => inner.iter_mut(),
302        }
303    }
304
305    #[inline]
306    #[must_use]
307    pub fn bytes(&self) -> Bytes<'_> {
308        match self {
309            EncodedString::Ascii(inner) => inner.bytes(),
310            EncodedString::Binary(inner) => inner.bytes(),
311            EncodedString::Utf8(inner) => inner.bytes(),
312        }
313    }
314
315    #[inline]
316    pub fn inspect(&self) -> Inspect<'_> {
317        match self {
318            EncodedString::Ascii(inner) => inner.into(),
319            EncodedString::Binary(inner) => inner.into(),
320            EncodedString::Utf8(inner) => inner.into(),
321        }
322    }
323    #[inline]
324    pub fn codepoints(&self) -> Result<Codepoints<'_>, CodepointsError> {
325        match self {
326            EncodedString::Ascii(inner) => Ok(inner.into()),
327            EncodedString::Binary(inner) => Ok(inner.into()),
328            EncodedString::Utf8(inner) => inner.try_into(),
329        }
330    }
331
332    #[inline]
333    pub fn reserve(&mut self, additional: usize) {
334        match self {
335            EncodedString::Ascii(inner) => inner.reserve(additional),
336            EncodedString::Binary(inner) => inner.reserve(additional),
337            EncodedString::Utf8(inner) => inner.reserve(additional),
338        }
339    }
340
341    #[inline]
342    pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
343        match self {
344            EncodedString::Ascii(inner) => inner.try_reserve(additional),
345            EncodedString::Binary(inner) => inner.try_reserve(additional),
346            EncodedString::Utf8(inner) => inner.try_reserve(additional),
347        }
348    }
349
350    #[inline]
351    pub fn reserve_exact(&mut self, additional: usize) {
352        match self {
353            EncodedString::Ascii(inner) => inner.reserve_exact(additional),
354            EncodedString::Binary(inner) => inner.reserve_exact(additional),
355            EncodedString::Utf8(inner) => inner.reserve_exact(additional),
356        }
357    }
358
359    #[inline]
360    pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
361        match self {
362            EncodedString::Ascii(inner) => inner.try_reserve_exact(additional),
363            EncodedString::Binary(inner) => inner.try_reserve_exact(additional),
364            EncodedString::Utf8(inner) => inner.try_reserve_exact(additional),
365        }
366    }
367
368    #[inline]
369    pub fn shrink_to_fit(&mut self) {
370        match self {
371            EncodedString::Ascii(inner) => inner.shrink_to_fit(),
372            EncodedString::Binary(inner) => inner.shrink_to_fit(),
373            EncodedString::Utf8(inner) => inner.shrink_to_fit(),
374        }
375    }
376
377    #[inline]
378    pub fn shrink_to(&mut self, min_capacity: usize) {
379        match self {
380            EncodedString::Ascii(inner) => inner.shrink_to(min_capacity),
381            EncodedString::Binary(inner) => inner.shrink_to(min_capacity),
382            EncodedString::Utf8(inner) => inner.shrink_to(min_capacity),
383        }
384    }
385
386    #[inline]
387    #[must_use]
388    pub fn get<I>(&self, index: I) -> Option<&I::Output>
389    where
390        I: SliceIndex<[u8]>,
391    {
392        match self {
393            EncodedString::Ascii(inner) => inner.get(index),
394            EncodedString::Binary(inner) => inner.get(index),
395            EncodedString::Utf8(inner) => inner.get(index),
396        }
397    }
398
399    #[inline]
400    #[must_use]
401    pub fn get_char(&self, index: usize) -> Option<&'_ [u8]> {
402        match self {
403            EncodedString::Ascii(inner) => inner.get_char(index),
404            EncodedString::Binary(inner) => inner.get_char(index),
405            EncodedString::Utf8(inner) => Some(inner.get_char(index)?.as_bytes()),
406        }
407    }
408
409    #[inline]
410    #[must_use]
411    pub fn get_char_slice(&self, range: Range<usize>) -> Option<&'_ [u8]> {
412        match self {
413            EncodedString::Ascii(inner) => inner.get_char_slice(range),
414            EncodedString::Binary(inner) => inner.get_char_slice(range),
415            EncodedString::Utf8(inner) => Some(inner.get_char_slice(range)?.as_bytes()),
416        }
417    }
418
419    #[inline]
420    #[must_use]
421    pub fn get_mut<I>(&mut self, index: I) -> Option<&mut I::Output>
422    where
423        I: SliceIndex<[u8]>,
424    {
425        match self {
426            EncodedString::Ascii(inner) => inner.get_mut(index),
427            EncodedString::Binary(inner) => inner.get_mut(index),
428            EncodedString::Utf8(inner) => inner.get_mut(index),
429        }
430    }
431
432    #[inline]
433    #[must_use]
434    pub unsafe fn get_unchecked<I>(&self, index: I) -> &I::Output
435    where
436        I: SliceIndex<[u8]>,
437    {
438        // SAFETY: The caller must uphold the documented safety contract, which
439        // is the same as each encoded string's inner buffer.
440        unsafe {
441            match self {
442                EncodedString::Ascii(inner) => inner.get_unchecked(index),
443                EncodedString::Binary(inner) => inner.get_unchecked(index),
444                EncodedString::Utf8(inner) => inner.get_unchecked(index),
445            }
446        }
447    }
448
449    #[inline]
450    #[must_use]
451    pub unsafe fn get_unchecked_mut<I>(&mut self, index: I) -> &mut I::Output
452    where
453        I: SliceIndex<[u8]>,
454    {
455        // SAFETY: The caller must uphold the documented safety contract, which
456        // is the same as each encoded string's inner buffer.
457        unsafe {
458            match self {
459                EncodedString::Ascii(inner) => inner.get_unchecked_mut(index),
460                EncodedString::Binary(inner) => inner.get_unchecked_mut(index),
461                EncodedString::Utf8(inner) => inner.get_unchecked_mut(index),
462            }
463        }
464    }
465
466    #[inline]
467    pub fn push_byte(&mut self, byte: u8) {
468        match self {
469            EncodedString::Ascii(inner) => inner.push_byte(byte),
470            EncodedString::Binary(inner) => inner.push_byte(byte),
471            EncodedString::Utf8(inner) => inner.push_byte(byte),
472        }
473    }
474
475    #[inline]
476    pub fn try_push_codepoint(&mut self, codepoint: i64) -> Result<(), InvalidCodepointError> {
477        match self {
478            EncodedString::Ascii(inner) => inner.try_push_codepoint(codepoint),
479            EncodedString::Binary(inner) => inner.try_push_codepoint(codepoint),
480            EncodedString::Utf8(inner) => inner.try_push_codepoint(codepoint),
481        }
482    }
483
484    #[inline]
485    pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> {
486        match self {
487            EncodedString::Ascii(inner) => {
488                let mut enc = None;
489                inner.try_push_int(int, &mut enc)?;
490                if let Some(enc) = enc {
491                    self.set_encoding(enc);
492                }
493            }
494            EncodedString::Binary(inner) => inner.try_push_int(int)?,
495            EncodedString::Utf8(inner) => inner.try_push_int(int)?,
496        }
497        Ok(())
498    }
499
500    #[inline]
501    pub fn push_char(&mut self, ch: char) {
502        match self {
503            EncodedString::Ascii(inner) => inner.push_char(ch),
504            EncodedString::Binary(inner) => inner.push_char(ch),
505            EncodedString::Utf8(inner) => inner.push_char(ch),
506        }
507    }
508
509    #[inline]
510    pub fn push_str(&mut self, s: &str) {
511        match self {
512            EncodedString::Ascii(inner) => inner.push_str(s),
513            EncodedString::Binary(inner) => inner.push_str(s),
514            EncodedString::Utf8(inner) => inner.push_str(s),
515        }
516    }
517
518    #[inline]
519    pub fn extend_from_slice(&mut self, other: &[u8]) {
520        match self {
521            EncodedString::Ascii(inner) => inner.extend_from_slice(other),
522            EncodedString::Binary(inner) => inner.extend_from_slice(other),
523            EncodedString::Utf8(inner) => inner.extend_from_slice(other),
524        }
525    }
526
527    #[inline]
528    #[must_use]
529    pub fn is_ascii_only(&self) -> bool {
530        match self {
531            EncodedString::Ascii(inner) => inner.is_ascii_only(),
532            EncodedString::Binary(inner) => inner.is_ascii_only(),
533            EncodedString::Utf8(inner) => inner.is_ascii_only(),
534        }
535    }
536
537    #[inline]
538    #[must_use]
539    pub fn is_valid_encoding(&self) -> bool {
540        match self {
541            EncodedString::Ascii(inner) => inner.is_valid_encoding(),
542            EncodedString::Binary(inner) => inner.is_valid_encoding(),
543            EncodedString::Utf8(inner) => inner.is_valid_encoding(),
544        }
545    }
546
547    #[inline]
548    pub fn make_capitalized(&mut self) -> CaseFoldingEffect {
549        match self {
550            EncodedString::Ascii(inner) => inner.make_capitalized(),
551            EncodedString::Binary(inner) => inner.make_capitalized(),
552            EncodedString::Utf8(inner) => inner.make_capitalized(),
553        }
554    }
555
556    #[inline]
557    pub fn make_uppercase(&mut self) -> CaseFoldingEffect {
558        match self {
559            EncodedString::Ascii(inner) => inner.make_uppercase(),
560            EncodedString::Binary(inner) => inner.make_uppercase(),
561            EncodedString::Utf8(inner) => inner.make_uppercase(),
562        }
563    }
564
565    #[inline]
566    pub fn make_lowercase(&mut self) -> CaseFoldingEffect {
567        match self {
568            EncodedString::Ascii(inner) => inner.make_lowercase(),
569            EncodedString::Binary(inner) => inner.make_lowercase(),
570            EncodedString::Utf8(inner) => inner.make_lowercase(),
571        }
572    }
573
574    #[inline]
575    pub fn make_swapcase(&mut self) -> CaseFoldingEffect {
576        match self {
577            EncodedString::Ascii(inner) => inner.make_swapcase(),
578            EncodedString::Binary(inner) => inner.make_swapcase(),
579            EncodedString::Utf8(inner) => inner.make_swapcase(),
580        }
581    }
582
583    #[inline]
584    #[must_use]
585    pub fn chr(&self) -> &[u8] {
586        match self {
587            EncodedString::Ascii(inner) => inner.chr(),
588            EncodedString::Binary(inner) => inner.chr(),
589            EncodedString::Utf8(inner) => inner.chr().as_bytes(),
590        }
591    }
592
593    #[inline]
594    pub fn ord(&self) -> Result<u32, OrdError> {
595        match self {
596            EncodedString::Ascii(inner) => inner.ord(),
597            EncodedString::Binary(inner) => inner.ord(),
598            EncodedString::Utf8(inner) => inner.ord(),
599        }
600    }
601
602    #[inline]
603    #[must_use]
604    pub fn ends_with(&self, slice: &[u8]) -> bool {
605        match self {
606            EncodedString::Ascii(inner) => inner.ends_with(slice),
607            EncodedString::Binary(inner) => inner.ends_with(slice),
608            EncodedString::Utf8(inner) => inner.ends_with(slice),
609        }
610    }
611
612    #[inline]
613    pub fn reverse(&mut self) {
614        match self {
615            EncodedString::Ascii(inner) => inner.reverse(),
616            EncodedString::Binary(inner) => inner.reverse(),
617            EncodedString::Utf8(inner) => inner.reverse(),
618        }
619    }
620
621    #[inline]
622    pub fn index(&self, needle: &[u8], offset: usize) -> Option<usize> {
623        match self {
624            EncodedString::Ascii(inner) => inner.index(needle, offset),
625            EncodedString::Binary(inner) => inner.index(needle, offset),
626            EncodedString::Utf8(inner) => inner.index(needle, offset),
627        }
628    }
629
630    #[inline]
631    pub fn rindex(&self, needle: &[u8], offset: usize) -> Option<usize> {
632        match self {
633            EncodedString::Ascii(inner) => inner.rindex(needle, offset),
634            EncodedString::Binary(inner) => inner.rindex(needle, offset),
635            EncodedString::Utf8(inner) => inner.rindex(needle, offset),
636        }
637    }
638}