spinoso_string/lib.rs
1#![warn(clippy::all, clippy::pedantic, clippy::undocumented_unsafe_blocks)]
2#![allow(
3 clippy::let_underscore_untyped,
4 reason = "https://github.com/rust-lang/rust-clippy/pull/10442#issuecomment-1516570154"
5)]
6#![allow(
7 clippy::question_mark,
8 reason = "https://github.com/rust-lang/rust-clippy/issues/8281"
9)]
10#![allow(clippy::manual_let_else, reason = "manual_let_else was very buggy on release")]
11#![allow(
12 clippy::module_name_repetitions,
13 reason = "incompatible with how code is organized in private modules"
14)]
15#![allow(
16 clippy::unnecessary_lazy_evaluations,
17 reason = "https://github.com/rust-lang/rust-clippy/issues/8109"
18)]
19#![cfg_attr(
20 test,
21 allow(clippy::non_ascii_literal, reason = "tests sometimes require UTF-8 string content")
22)]
23#![allow(unknown_lints)]
24#![warn(
25 missing_copy_implementations,
26 missing_debug_implementations,
27 missing_docs,
28 rust_2024_compatibility,
29 trivial_casts,
30 trivial_numeric_casts,
31 unused_qualifications,
32 variant_size_differences
33)]
34#![expect(missing_docs, reason = "TODO: fully document crate")]
35// Enable feature callouts in generated documentation:
36// https://doc.rust-lang.org/beta/unstable-book/language-features/doc-cfg.html
37//
38// This approach is borrowed from tokio.
39#![cfg_attr(docsrs, feature(doc_cfg))]
40#![cfg_attr(docsrs, feature(doc_alias))]
41
42//! A String object holds and manipulates an arbitrary sequence of bytes,
43//! typically representing characters.
44
45#![no_std]
46
47// Ensure code blocks in `README.md` compile
48#[cfg(doctest)]
49#[doc = include_str!("../README.md")]
50mod readme {}
51
52extern crate alloc;
53#[cfg(feature = "std")]
54extern crate std;
55
56use alloc::boxed::Box;
57use alloc::collections::TryReserveError;
58use alloc::vec::Vec;
59#[cfg(feature = "casecmp")]
60use core::cmp::Ordering;
61use core::fmt;
62use core::ops::Range;
63use core::slice::SliceIndex;
64use core::str;
65
66use bstr::ByteSlice;
67#[doc(inline)]
68#[cfg(feature = "casecmp")]
69#[cfg_attr(docsrs, doc(cfg(feature = "casecmp")))]
70pub use focaccia::CaseFold;
71use scolapasta_strbuf::Buf;
72#[doc(inline)]
73pub use scolapasta_strbuf::RawParts;
74
75mod case_folding;
76mod center;
77mod chars;
78mod codepoints;
79mod enc;
80mod encoding;
81mod eq;
82mod impls;
83mod inspect;
84mod iter;
85mod ord;
86#[cfg(test)]
87mod test;
88
89pub use case_folding::CaseFoldingEffect;
90pub use center::{Center, CenterError};
91pub use chars::Chars;
92pub use codepoints::{Codepoints, CodepointsError, InvalidCodepointError};
93use enc::EncodedString;
94pub use encoding::{Encoding, InvalidEncodingError};
95pub use inspect::Inspect;
96pub use iter::{Bytes, IntoIter, Iter, IterMut};
97pub use ord::OrdError;
98
99#[derive(Default, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
100pub struct String {
101 inner: EncodedString,
102}
103
104impl fmt::Debug for String {
105 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106 f.debug_struct("String")
107 .field("buf", &self.inner.as_slice().as_bstr())
108 .field("encoding", &self.inner.encoding())
109 .finish()
110 }
111}
112
113// Constructors
114impl String {
115 /// Constructs a new, empty `String`.
116 ///
117 /// The `String` is [conventionally UTF-8].
118 ///
119 /// The string will not allocate until bytes are pushed onto it.
120 ///
121 /// # Examples
122 ///
123 /// ```
124 /// use spinoso_string::{Encoding, String};
125 ///
126 /// let s = String::new();
127 /// assert_eq!(s.encoding(), Encoding::Utf8);
128 /// ```
129 ///
130 /// [conventionally UTF-8]: crate::Encoding::Utf8
131 #[inline]
132 #[must_use]
133 pub fn new() -> Self {
134 let buf = Buf::new();
135 let inner = EncodedString::utf8(buf);
136 Self { inner }
137 }
138
139 /// Constructs a new, empty `String` with the specified capacity.
140 ///
141 /// The `String` is [conventionally UTF-8].
142 ///
143 /// The string will be able to hold at least `capacity` bytes without
144 /// reallocating. If `capacity` is 0, the string will not allocate.
145 ///
146 /// It is important to note that although the returned string has the
147 /// capacity specified, the string will have a zero length. For an
148 /// explanation of the difference between length and capacity, see
149 /// *[Capacity and reallocation]*.
150 ///
151 /// # Examples
152 ///
153 /// Encoding, capacity, and length:
154 ///
155 /// ```
156 /// use spinoso_string::{Encoding, String};
157 ///
158 /// let s = String::with_capacity(10);
159 /// assert_eq!(s.encoding(), Encoding::Utf8);
160 /// assert!(s.capacity() >= 10);
161 /// assert_eq!(s.len(), 0);
162 /// ```
163 ///
164 /// Allocation:
165 ///
166 /// ```
167 /// use spinoso_string::{Encoding, String};
168 ///
169 /// let mut s = String::with_capacity(10);
170 ///
171 /// for ch in 'a'..='j' {
172 /// s.push_byte(ch as u8);
173 /// }
174 /// // 10 elements have been inserted without reallocating.
175 /// assert!(s.capacity() >= 10);
176 /// assert_eq!(s.len(), 10);
177 /// ```
178 ///
179 /// [conventionally UTF-8]: crate::Encoding::Utf8
180 /// [Capacity and reallocation]: https://doc.rust-lang.org/std/vec/struct.Vec.html#capacity-and-reallocation
181 #[inline]
182 #[must_use]
183 pub fn with_capacity(capacity: usize) -> Self {
184 let buf = Buf::with_capacity(capacity);
185 let inner = EncodedString::utf8(buf);
186 Self { inner }
187 }
188
189 /// Constructs a new, empty `String` with the specified capacity and
190 /// encoding.
191 ///
192 /// The string will be able to hold at least `capacity` bytes without
193 /// reallocating. If `capacity` is 0, the string will not allocate.
194 ///
195 /// It is important to note that although the returned string has the
196 /// capacity specified, the string will have a zero length. For an
197 /// explanation of the difference between length and capacity, see
198 /// *[Capacity and reallocation]*.
199 ///
200 /// # Examples
201 ///
202 /// Encoding, capacity, and length:
203 ///
204 /// ```
205 /// use spinoso_string::{Encoding, String};
206 ///
207 /// let s = String::with_capacity(10);
208 /// assert_eq!(s.encoding(), Encoding::Utf8);
209 /// assert!(s.capacity() >= 10);
210 /// assert_eq!(s.len(), 0);
211 /// ```
212 ///
213 /// Allocation:
214 ///
215 /// ```
216 /// use spinoso_string::{Encoding, String};
217 ///
218 /// let mut s = String::with_capacity_and_encoding(10, Encoding::Binary);
219 /// assert_eq!(s.encoding(), Encoding::Binary);
220 ///
221 /// for ch in 'a'..='j' {
222 /// s.push_byte(ch as u8);
223 /// }
224 /// // 10 elements have been inserted without reallocating.
225 /// assert!(s.capacity() >= 10);
226 /// assert_eq!(s.len(), 10);
227 /// ```
228 ///
229 /// [Capacity and reallocation]: https://doc.rust-lang.org/std/vec/struct.Vec.html#capacity-and-reallocation
230 #[inline]
231 #[must_use]
232 pub fn with_capacity_and_encoding(capacity: usize, encoding: Encoding) -> Self {
233 let buf = Buf::with_capacity(capacity);
234 let inner = EncodedString::new(buf, encoding);
235 Self { inner }
236 }
237
238 #[inline]
239 #[must_use]
240 pub fn with_bytes_and_encoding(buf: Vec<u8>, encoding: Encoding) -> Self {
241 let inner = EncodedString::new(buf.into(), encoding);
242 Self { inner }
243 }
244
245 #[inline]
246 #[must_use]
247 pub fn utf8(buf: Vec<u8>) -> Self {
248 let inner = EncodedString::utf8(buf.into());
249 Self { inner }
250 }
251
252 #[inline]
253 #[must_use]
254 pub fn ascii(buf: Vec<u8>) -> Self {
255 let inner = EncodedString::ascii(buf.into());
256 Self { inner }
257 }
258
259 #[inline]
260 #[must_use]
261 pub fn binary(buf: Vec<u8>) -> Self {
262 let inner = EncodedString::binary(buf.into());
263 Self { inner }
264 }
265}
266
267// Core data structure manipulation
268impl String {
269 /// Returns the [`Encoding`] of this `String`.
270 ///
271 /// # Examples
272 ///
273 /// ```
274 /// use spinoso_string::{Encoding, String};
275 ///
276 /// let s = String::utf8(b"xyz".to_vec());
277 /// assert_eq!(s.encoding(), Encoding::Utf8);
278 /// ```
279 #[inline]
280 #[must_use]
281 pub fn encoding(&self) -> Encoding {
282 self.inner.encoding()
283 }
284
285 /// Set the [`Encoding`] of this `String`.
286 ///
287 /// # Examples
288 ///
289 /// ```
290 /// use spinoso_string::{Encoding, String};
291 ///
292 /// let mut s = String::utf8(b"xyz".to_vec());
293 /// assert_eq!(s.encoding(), Encoding::Utf8);
294 /// s.set_encoding(Encoding::Binary);
295 /// assert_eq!(s.encoding(), Encoding::Binary);
296 /// ```
297 #[inline]
298 pub fn set_encoding(&mut self, encoding: Encoding) {
299 self.inner.set_encoding(encoding);
300 }
301
302 /// Shortens the string, keeping the first `len` bytes and dropping the
303 /// rest.
304 ///
305 /// If `len` is greater than the string's current length, this has no
306 /// effect.
307 ///
308 /// Note that this method has no effect on the allocated capacity
309 /// of the string.
310 ///
311 /// # Examples
312 ///
313 /// Truncating a five byte to two elements:
314 ///
315 /// ```
316 /// use spinoso_string::String;
317 ///
318 /// let mut s = String::from("12345");
319 /// s.truncate(2);
320 /// assert_eq!(*s, *b"12");
321 /// ```
322 ///
323 /// No truncation occurs when `len` is greater than the string's current
324 /// length:
325 ///
326 /// ```
327 /// use spinoso_string::String;
328 ///
329 /// let mut s = String::from("12345");
330 /// s.truncate(10);
331 /// assert_eq!(*s, *b"12345");
332 /// ```
333 ///
334 /// Truncating when `len == 0` is equivalent to calling the [`clear`]
335 /// method.
336 ///
337 /// ```
338 /// use spinoso_string::String;
339 ///
340 /// let mut s = String::from("12345");
341 /// s.truncate(0);
342 /// assert_eq!(*s, *b"");
343 /// ```
344 ///
345 /// [`clear`]: Self::clear
346 #[inline]
347 pub fn truncate(&mut self, len: usize) {
348 self.inner.truncate(len);
349 }
350
351 /// Extracts a slice containing the entire byte string.
352 ///
353 /// Equivalent to `&s[..]`.
354 #[inline]
355 #[must_use]
356 pub fn as_slice(&self) -> &[u8] {
357 self.inner.as_slice()
358 }
359
360 /// Extracts a mutable slice containing the entire byte string.
361 ///
362 /// Equivalent to `&mut s[..]`.
363 #[inline]
364 #[must_use]
365 pub fn as_mut_slice(&mut self) -> &mut [u8] {
366 self.inner.as_mut_slice()
367 }
368
369 /// Returns a raw pointer to the string's buffer.
370 ///
371 /// The caller must ensure that the string outlives the pointer this
372 /// function returns, or else it will end up pointing to garbage. Modifying
373 /// the string may cause its buffer to be reallocated, which would also make
374 /// any pointers to it invalid.
375 ///
376 /// The caller must also ensure that the memory the pointer
377 /// (non-transitively) points to is never written to (except inside an
378 /// `UnsafeCell`) using this pointer or any pointer derived from it. If you
379 /// need to mutate the contents of the slice, use [`as_mut_ptr`].
380 ///
381 /// # Examples
382 ///
383 /// ```
384 /// use spinoso_string::String;
385 ///
386 /// let s = String::utf8(b"xyz".to_vec());
387 /// let s_ptr = s.as_ptr();
388 ///
389 /// unsafe {
390 /// for i in 0..s.len() {
391 /// assert_eq!(*s_ptr.add(i), b'x' + (i as u8));
392 /// }
393 /// }
394 /// ```
395 ///
396 /// [`as_mut_ptr`]: Self::as_mut_ptr
397 #[inline]
398 #[must_use]
399 pub fn as_ptr(&self) -> *const u8 {
400 self.inner.as_ptr()
401 }
402
403 /// Returns an unsafe mutable pointer to the string's buffer.
404 ///
405 /// The caller must ensure that the string outlives the pointer this
406 /// function returns, or else it will end up pointing to garbage. Modifying
407 /// the string may cause its buffer to be reallocated, which would also make
408 /// any pointers to it invalid.
409 ///
410 /// # Examples
411 ///
412 /// ```
413 /// use spinoso_string::String;
414 ///
415 /// // Allocate string big enough for 3 bytes.
416 /// let size = 3;
417 /// let mut s = String::with_capacity(size);
418 /// let s_ptr = s.as_mut_ptr();
419 ///
420 /// // Initialize elements via raw pointer writes, then set length.
421 /// unsafe {
422 /// for i in 0..size {
423 /// *s_ptr.add(i) = b'x' + (i as u8);
424 /// }
425 /// s.set_len(size);
426 /// }
427 /// assert_eq!(&*s, b"xyz");
428 /// ```
429 #[inline]
430 #[must_use]
431 pub fn as_mut_ptr(&mut self) -> *mut u8 {
432 self.inner.as_mut_ptr()
433 }
434
435 /// Forces the length of the string to `new_len`.
436 ///
437 /// This is a low-level operation that maintains none of the normal
438 /// invariants of the type. Normally changing the length of a string is done
439 /// using one of the safe operations instead, such as [`truncate`],
440 /// [`extend`], or [`clear`].
441 ///
442 /// This function can change the return value of [`String::is_valid_encoding`].
443 ///
444 /// # Safety
445 ///
446 /// - `new_len` must be less than or equal to [`capacity()`].
447 /// - The elements at `old_len..new_len` must be initialized.
448 ///
449 /// [`truncate`]: Self::truncate
450 /// [`extend`]: Extend::extend
451 /// [`clear`]: Self::clear
452 /// [`capacity()`]: Self::capacity
453 #[inline]
454 pub unsafe fn set_len(&mut self, new_len: usize) {
455 // SAFETY: The caller must uphold the documented safety contract, which
456 // is the same as each encoded string's inner buffer.
457 unsafe {
458 self.inner.set_len(new_len);
459 }
460 }
461
462 /// Creates a `String` directly from the raw components of another string.
463 ///
464 /// # Safety
465 ///
466 /// This is highly unsafe, due to the number of invariants that aren't
467 /// checked:
468 ///
469 /// - `ptr` needs to have been previously allocated via `String` (at least,
470 /// it's highly likely to be incorrect if it wasn't).
471 /// - `length` needs to be less than or equal to `capacity`.
472 /// - `capacity` needs to be the `capacity` that the pointer was allocated
473 /// with.
474 ///
475 /// Violating these may cause problems like corrupting the allocator's
476 /// internal data structures.
477 ///
478 /// The ownership of `ptr` is effectively transferred to the `String` which
479 /// may then deallocate, reallocate or change the contents of memory pointed
480 /// to by the pointer at will. Ensure that nothing else uses the pointer
481 /// after calling this function.
482 #[must_use]
483 pub unsafe fn from_raw_parts(raw_parts: RawParts<u8>) -> Self {
484 // SAFETY: The caller must uphold the documented safety contract, which
485 // is the same as each encoded string's inner buffer.
486 let buf = unsafe { raw_parts.into_vec() };
487 Self::utf8(buf)
488 }
489
490 /// Creates a `String` directly from the raw components of another string
491 /// with the specified encoding.
492 ///
493 /// # Safety
494 ///
495 /// This is highly unsafe, due to the number of invariants that aren't
496 /// checked:
497 ///
498 /// - `ptr` needs to have been previously allocated via `String` (at least,
499 /// it's highly likely to be incorrect if it wasn't).
500 /// - `length` needs to be less than or equal to `capacity`.
501 /// - `capacity` needs to be the `capacity` that the pointer was allocated
502 /// with.
503 ///
504 /// Violating these may cause problems like corrupting the allocator's
505 /// internal data structures.
506 ///
507 /// The ownership of `ptr` is effectively transferred to the `String` which
508 /// may then deallocate, reallocate or change the contents of memory pointed
509 /// to by the pointer at will. Ensure that nothing else uses the pointer
510 /// after calling this function.
511 #[must_use]
512 pub unsafe fn from_raw_parts_with_encoding(raw_parts: RawParts<u8>, encoding: Encoding) -> Self {
513 // SAFETY: The caller must uphold the documented safety contract, which
514 // is the same as each encoded string's inner buffer.
515 let buf = unsafe { raw_parts.into_vec() };
516 Self::with_bytes_and_encoding(buf, encoding)
517 }
518
519 /// Decomposes a `String` into its raw components.
520 ///
521 /// Returns the raw pointer to the underlying data, the length of the string
522 /// (in bytes), and the allocated capacity of the data (in bytes). These
523 /// are the same arguments in the same order as the arguments to
524 /// [`from_raw_parts`].
525 ///
526 /// After calling this function, the caller is responsible for the memory
527 /// previously managed by the `String`. The only way to do this is to
528 /// convert the raw pointer, length, and capacity back into a `String` with
529 /// the [`from_raw_parts`] function, allowing the destructor to perform the
530 /// cleanup.
531 ///
532 /// [`from_raw_parts`]: String::from_raw_parts
533 #[must_use]
534 pub fn into_raw_parts(self) -> RawParts<u8> {
535 self.inner.into_buf().into_raw_parts()
536 }
537
538 /// Converts self into a vector without clones or allocation.
539 ///
540 /// This method consumes this `String` and returns its inner [`Vec<u8>`]
541 /// buffer.
542 ///
543 /// # Examples
544 ///
545 /// ```
546 /// use spinoso_string::String;
547 ///
548 /// let s = String::from("hello");
549 /// let buf = s.into_vec();
550 /// // `s` cannot be used anymore because it has been converted into `buf`.
551 ///
552 /// assert_eq!(buf, b"hello".to_vec());
553 /// ```
554 /// [`Vec<u8>`]: alloc::vec::Vec
555 #[inline]
556 #[must_use]
557 pub fn into_vec(self) -> Vec<u8> {
558 self.inner.into_buf().into_inner()
559 }
560
561 /// Converts the vector into `Box<[u8]>`.
562 ///
563 /// Note that this will drop any excess capacity.
564 ///
565 /// # Examples
566 ///
567 /// ```
568 /// use spinoso_string::String;
569 ///
570 /// let s = String::from("abc");
571 /// let slice = s.into_boxed_slice();
572 /// ```
573 ///
574 /// Any excess capacity is removed:
575 ///
576 /// ```
577 /// use spinoso_string::String;
578 ///
579 /// let mut s = String::with_capacity(10);
580 /// s.extend_from_slice(&[b'a', b'b', b'c']);
581 ///
582 /// assert!(s.capacity() >= 10);
583 /// let slice = s.into_boxed_slice();
584 /// assert_eq!(slice.into_vec().capacity(), 3);
585 /// ```
586 ///
587 /// [`Box<u8>`]: alloc::boxed::Box
588 #[inline]
589 #[must_use]
590 pub fn into_boxed_slice(self) -> Box<[u8]> {
591 self.inner.into_buf().into_boxed_slice()
592 }
593
594 /// Returns the number of bytes the string can hold without reallocating.
595 ///
596 /// # Examples
597 ///
598 /// ```
599 /// use spinoso_string::String;
600 ///
601 /// let s = String::with_capacity(10);
602 /// assert!(s.capacity() >= 10);
603 /// ```
604 #[inline]
605 #[must_use]
606 pub fn capacity(&self) -> usize {
607 self.inner.capacity()
608 }
609
610 /// Clears the string, removing all bytes.
611 ///
612 /// Note that this method has no effect on the allocated capacity or the
613 /// encoding of the string.
614 ///
615 /// # Examples
616 ///
617 /// ```
618 /// use spinoso_string::String;
619 ///
620 /// let mut s = String::from("abc");
621 /// s.clear();
622 /// assert!(s.is_empty());
623 /// ```
624 #[inline]
625 pub fn clear(&mut self) {
626 self.inner.clear();
627 }
628
629 /// Returns true if the vector contains no bytes.
630 ///
631 /// # Examples
632 ///
633 /// ```
634 /// use spinoso_string::String;
635 ///
636 /// let mut s = String::new();
637 /// assert!(s.is_empty());
638 ///
639 /// s.push_char('x');
640 /// assert!(!s.is_empty());
641 /// ```
642 #[inline]
643 #[must_use]
644 pub fn is_empty(&self) -> bool {
645 self.inner.is_empty()
646 }
647
648 /// Returns the number of bytes in the string, also referred to as its
649 /// "length" or "bytesize".
650 ///
651 /// See also [`bytesize`].
652 ///
653 /// # Examples
654 ///
655 /// ```
656 /// use spinoso_string::String;
657 ///
658 /// let s = String::from("xyz");
659 /// assert_eq!(s.len(), 3);
660 /// ```
661 ///
662 /// [`bytesize`]: Self::bytesize
663 #[inline]
664 #[must_use]
665 pub fn len(&self) -> usize {
666 self.inner.len()
667 }
668}
669
670// Core iterators
671impl String {
672 /// Returns an iterator over this string's underlying byte slice.
673 ///
674 /// # Examples
675 ///
676 /// ```
677 /// use spinoso_string::String;
678 ///
679 /// let s = String::from("abc");
680 /// let mut iterator = s.iter();
681 ///
682 /// assert_eq!(iterator.next(), Some(&b'a'));
683 /// assert_eq!(iterator.next(), Some(&b'b'));
684 /// assert_eq!(iterator.next(), Some(&b'c'));
685 /// assert_eq!(iterator.next(), None);
686 /// ```
687 #[inline]
688 #[must_use]
689 pub fn iter(&self) -> Iter<'_> {
690 self.inner.iter()
691 }
692
693 /// Returns an iterator that allows modifying this string's underlying byte
694 /// slice.
695 ///
696 /// # Examples
697 ///
698 /// ```
699 /// use spinoso_string::String;
700 ///
701 /// let mut s = String::from("abc");
702 ///
703 /// for byte in s.iter_mut() {
704 /// *byte = b'x';
705 /// }
706 ///
707 /// assert_eq!(s, "xxx");
708 /// ```
709 #[inline]
710 #[must_use]
711 pub fn iter_mut(&mut self) -> IterMut<'_> {
712 self.inner.iter_mut()
713 }
714
715 /// Returns an iterator over the bytes in this byte string.
716 ///
717 /// # Examples
718 ///
719 /// ```
720 /// use spinoso_string::String;
721 ///
722 /// let s = String::utf8(b"foobar".to_vec());
723 /// let bytes: Vec<u8> = s.bytes().collect();
724 /// assert_eq!(bytes, s);
725 /// ```
726 #[inline]
727 #[must_use]
728 pub fn bytes(&self) -> Bytes<'_> {
729 self.inner.bytes()
730 }
731}
732
733// Additional `IntoIterator` iterator
734impl IntoIterator for String {
735 type Item = u8;
736 type IntoIter = IntoIter;
737
738 /// Returns an iterator that moves over the bytes of this string.
739 ///
740 /// # Examples
741 ///
742 /// ```
743 /// use spinoso_string::String;
744 ///
745 /// let s = String::from("abc");
746 ///
747 /// let mut iterator = s.into_iter();
748 ///
749 /// assert_eq!(iterator.next(), Some(b'a'));
750 /// assert_eq!(iterator.next(), Some(b'b'));
751 /// assert_eq!(iterator.next(), Some(b'c'));
752 /// assert_eq!(iterator.next(), None);
753 /// ```
754 #[inline]
755 fn into_iter(self) -> Self::IntoIter {
756 self.inner.into_iter()
757 }
758}
759
760impl<'a> IntoIterator for &'a String {
761 type Item = &'a u8;
762 type IntoIter = Iter<'a>;
763
764 /// Returns a borrowing iterator that moves over the bytes of this string.
765 ///
766 /// # Examples
767 ///
768 /// ```
769 /// use spinoso_string::String;
770 ///
771 /// let s = String::from("abc");
772 ///
773 /// for &b in &s {
774 /// assert_eq!(b, b'a');
775 /// break;
776 /// }
777 /// ```
778 #[inline]
779 fn into_iter(self) -> Self::IntoIter {
780 self.iter()
781 }
782}
783
784impl<'a> IntoIterator for &'a mut String {
785 type Item = &'a mut u8;
786 type IntoIter = IterMut<'a>;
787
788 /// Returns a borrowing iterator that mutably moves over the bytes of this
789 /// string.
790 ///
791 /// # Examples
792 ///
793 /// ```
794 /// use spinoso_string::String;
795 ///
796 /// let mut s = String::from("abc");
797 ///
798 /// for b in &mut s {
799 /// *b = b'1';
800 /// }
801 ///
802 /// assert_eq!(s, b"111");
803 /// ```
804 #[inline]
805 fn into_iter(self) -> Self::IntoIter {
806 self.iter_mut()
807 }
808}
809
810// Memory management
811impl String {
812 /// Reserves capacity for at least `additional` more bytes to be inserted in
813 /// the given `String`. The string may reserve more space to avoid frequent
814 /// reallocations. After calling `reserve`, capacity will be greater than or
815 /// equal to `self.len() + additional`. Does nothing if capacity is already
816 /// sufficient.
817 ///
818 /// # Panics
819 ///
820 /// Panics if the new capacity exceeds [`isize::MAX`] bytes.
821 ///
822 /// # Examples
823 ///
824 /// ```
825 /// use spinoso_string::String;
826 ///
827 /// let mut s = String::from("x");
828 /// s.reserve(10);
829 /// assert!(s.capacity() >= 11);
830 /// ```
831 #[inline]
832 pub fn reserve(&mut self, additional: usize) {
833 self.inner.reserve(additional);
834 }
835
836 /// Tries to reserve capacity for at least `additional` more elements to be
837 /// inserted in the `String`. The collection may reserve more space to avoid
838 /// frequent reallocations. After calling `try_reserve`, capacity will be
839 /// greater than or equal to `self.len() + additional`. Does nothing if
840 /// capacity is already sufficient.
841 ///
842 /// # Errors
843 ///
844 /// If the capacity overflows, or the allocator reports a failure, then an
845 /// error is returned.
846 ///
847 /// # Examples
848 ///
849 /// ```
850 /// use spinoso_string::String;
851 /// let mut str = String::from("x");
852 /// str.try_reserve(10).expect("why is this OOMing?");
853 /// assert!(str.capacity() >= 11);
854 /// ```
855 #[inline]
856 pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
857 self.inner.try_reserve(additional)
858 }
859
860 /// Reserves the minimum capacity for exactly `additional` more bytes to be
861 /// inserted in the given `String`. After calling `reserve_exact`, capacity
862 /// will be greater than or equal to `self.len() + additional`. Does nothing
863 /// if the capacity is already sufficient.
864 ///
865 /// Note that the allocator may give the string more space than it requests.
866 /// Therefore, capacity can not be relied upon to be precisely minimal.
867 /// Prefer [`reserve`] if future insertions are expected.
868 ///
869 /// # Panics
870 ///
871 /// Panics if the new capacity exceeds [`isize::MAX`] bytes.
872 ///
873 /// # Examples
874 ///
875 /// ```
876 /// use spinoso_string::String;
877 ///
878 /// let mut s = String::from("x");
879 /// s.reserve_exact(10);
880 /// assert!(s.capacity() >= 11);
881 /// ```
882 ///
883 /// [`reserve`]: Self::reserve
884 #[inline]
885 pub fn reserve_exact(&mut self, additional: usize) {
886 self.inner.reserve_exact(additional);
887 }
888
889 /// Tries to reserve the minimum capacity for exactly `additional`
890 /// elements to be inserted in the `String`. After calling
891 /// `try_reserve_exact`, capacity will be greater than or equal to
892 /// `self.len() + additional` if it returns `Ok(())`. Does nothing if the
893 /// capacity is already sufficient.
894 ///
895 /// Note that the allocator may give the collection more space than
896 /// it requests. Therefore, capacity can not be relied upon to be
897 /// precisely minimal. Prefer [`try_reserve`] if future insertions are
898 /// expected.
899 ///
900 /// # Errors
901 ///
902 /// If the capacity overflows, or the allocator reports a failure, then an
903 /// error is returned.
904 ///
905 /// # Examples
906 ///
907 /// ```
908 /// use spinoso_string::String;
909 /// let mut str = String::from("x");
910 /// str.try_reserve_exact(10).expect("why is this OOMing?");
911 /// assert!(str.capacity() >= 11);
912 /// ```
913 ///
914 /// [`try_reserve`]: Self::try_reserve
915 #[inline]
916 pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
917 self.inner.try_reserve_exact(additional)
918 }
919
920 /// Shrinks the capacity of the vector as much as possible.
921 ///
922 /// It will drop down as close as possible to the length but the allocator
923 /// may still inform the string that there is space for a few more bytes.
924 ///
925 /// # Examples
926 ///
927 /// ```
928 /// use spinoso_string::String;
929 ///
930 /// let mut s = String::with_capacity(10);
931 /// s.extend_from_slice(b"abc");
932 /// assert!(s.capacity() >= 10);
933 /// s.shrink_to_fit();
934 /// assert!(s.capacity() >= 3);
935 /// ```
936 #[inline]
937 pub fn shrink_to_fit(&mut self) {
938 self.inner.shrink_to_fit();
939 }
940
941 /// Shrinks the capacity of the vector with a lower bound.
942 ///
943 /// The capacity will remain at least as large as both the length and the
944 /// supplied value.
945 ///
946 /// If the current capacity is less than the lower limit, this is a no-op.
947 ///
948 /// # Examples
949 ///
950 /// ```
951 /// use spinoso_string::String;
952 ///
953 /// let mut s = String::with_capacity(10);
954 /// s.extend_from_slice(b"abc");
955 /// assert!(s.capacity() >= 10);
956 /// s.shrink_to(5);
957 /// assert!(s.capacity() >= 5);
958 /// ```
959 #[inline]
960 pub fn shrink_to(&mut self, min_capacity: usize) {
961 self.inner.shrink_to(min_capacity);
962 }
963}
964
965// Indexing
966impl String {
967 /// Returns a reference to a byte or sub-byteslice depending on the type of
968 /// index.
969 ///
970 /// - If given a position, returns a reference to the byte at that position
971 /// or [`None`] if out of bounds.
972 /// - If given a range, returns the subslice corresponding to that range, or
973 /// [`None`] if out of bounds.
974 ///
975 /// # Examples
976 ///
977 /// ```
978 /// use spinoso_string::String;
979 ///
980 /// let s = String::from("abc");
981 /// assert_eq!(s.get(1), Some(&b'b'));
982 /// assert_eq!(s.get(0..2), Some(&b"ab"[..]));
983 /// assert_eq!(s.get(3), None);
984 /// assert_eq!(s.get(0..4), None);
985 /// ```
986 #[inline]
987 #[must_use]
988 pub fn get<I>(&self, index: I) -> Option<&I::Output>
989 where
990 I: SliceIndex<[u8]>,
991 {
992 self.inner.get(index)
993 }
994
995 /// Returns a mutable reference to a byte or sub-byteslice depending on the
996 /// type of index (see [`get`]) or [`None`] if the index is out of bounds.
997 ///
998 /// # Examples
999 ///
1000 /// ```
1001 /// use spinoso_string::String;
1002 ///
1003 /// let mut s = String::from("abc");
1004 ///
1005 /// if let Some(byte) = s.get_mut(1) {
1006 /// *byte = b'x';
1007 /// }
1008 /// assert_eq!(s, "axc");
1009 /// ```
1010 ///
1011 /// [`get`]: Self::get
1012 #[inline]
1013 #[must_use]
1014 pub fn get_mut<I>(&mut self, index: I) -> Option<&mut I::Output>
1015 where
1016 I: SliceIndex<[u8]>,
1017 {
1018 self.inner.get_mut(index)
1019 }
1020
1021 /// Returns a reference to a byte or sub-byteslice, without doing bounds
1022 /// checking.
1023 ///
1024 /// For a safe alternative see [`get`].
1025 ///
1026 /// # Safety
1027 ///
1028 /// Calling this method with an out-of-bounds index is *[undefined
1029 /// behavior]* even if the resulting reference is not used.
1030 ///
1031 /// # Examples
1032 ///
1033 /// ```
1034 /// use spinoso_string::String;
1035 ///
1036 /// let s = String::from("abc");
1037 ///
1038 /// unsafe {
1039 /// assert_eq!(s.get_unchecked(1), &b'b');
1040 /// }
1041 /// ```
1042 ///
1043 /// [`get`]: Self::get
1044 /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
1045 #[inline]
1046 #[must_use]
1047 pub unsafe fn get_unchecked<I>(&self, index: I) -> &I::Output
1048 where
1049 I: SliceIndex<[u8]>,
1050 {
1051 // SAFETY: The caller must uphold the documented safety contract, which
1052 // is the same as each encoded string's inner buffer.
1053 unsafe { self.inner.get_unchecked(index) }
1054 }
1055
1056 /// Returns a mutable reference to a byte or sub-byteslice, without doing
1057 /// bounds checking.
1058 ///
1059 /// For a safe alternative see [`get_mut`].
1060 ///
1061 /// # Safety
1062 ///
1063 /// Calling this method with an out-of-bounds index is *[undefined
1064 /// behavior]* even if the resulting reference is not used.
1065 ///
1066 /// # Examples
1067 ///
1068 /// ```
1069 /// use spinoso_string::String;
1070 ///
1071 /// let mut s = String::from("abc");
1072 ///
1073 /// unsafe {
1074 /// let byte = s.get_unchecked_mut(1);
1075 /// *byte = b'x';
1076 /// }
1077 /// assert_eq!(s, "axc");
1078 /// ```
1079 ///
1080 /// [`get_mut`]: Self::get_mut
1081 /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
1082 #[inline]
1083 #[must_use]
1084 pub unsafe fn get_unchecked_mut<I>(&mut self, index: I) -> &mut I::Output
1085 where
1086 I: SliceIndex<[u8]>,
1087 {
1088 // SAFETY: The caller must uphold the documented safety contract, which
1089 // is the same as each encoded string's inner buffer.
1090 unsafe { self.inner.get_unchecked_mut(index) }
1091 }
1092}
1093
1094// Pushing and popping bytes, codepoints, and strings.
1095impl String {
1096 /// Appends a given byte onto the end of this `String`.
1097 ///
1098 /// The given byte is not required to be a valid byte given this `String`'s
1099 /// [encoding] because encodings are only conventional.
1100 ///
1101 /// # Examples
1102 ///
1103 /// ```
1104 /// use spinoso_string::String;
1105 ///
1106 /// let mut s = String::utf8(b"UTF-8?".to_vec());
1107 /// s.push_byte(0xFF);
1108 /// assert_eq!(s, b"UTF-8?\xFF");
1109 /// ```
1110 ///
1111 /// [encoding]: crate::Encoding
1112 #[inline]
1113 pub fn push_byte(&mut self, byte: u8) {
1114 self.inner.push_byte(byte);
1115 }
1116
1117 /// Try to append a given Unicode codepoint onto the end of this `String`.
1118 ///
1119 /// This API is encoding-aware. For [UTF-8] strings, the given integer is
1120 /// converted to a [`char`] before appending to this `String` using
1121 /// [`push_char`]. For [ASCII] and [binary] strings, the given integer is
1122 /// converted to a byte before appending to this `String` using
1123 /// [`push_byte`].
1124 ///
1125 /// This function can be used to implement the Ruby method [`String#<<`] for
1126 /// [`Integer`][ruby-integer] arguments.
1127 ///
1128 /// # Errors
1129 ///
1130 /// If this `String` is [conventionally UTF-8] and the given codepoint is
1131 /// not a valid [`char`], an error is returned.
1132 ///
1133 /// If this `String` has [ASCII] or [binary] encoding and the given
1134 /// codepoint is not a valid byte, an error is returned.
1135 ///
1136 /// # Examples
1137 ///
1138 /// For [UTF-8] strings, the given codepoint is converted to a Unicode scalar
1139 /// value before appending:
1140 ///
1141 /// ```
1142 /// use spinoso_string::String;
1143 ///
1144 /// # fn example() -> Result<(), spinoso_string::InvalidCodepointError> {
1145 /// let mut s = String::utf8(b"".to_vec());
1146 /// s.try_push_codepoint(b'a' as i64)?;
1147 /// assert_eq!(s, "a");
1148 /// assert!(s.try_push_codepoint(0xD83F).is_err());
1149 /// assert!(s.try_push_codepoint(-1).is_err());
1150 /// # Ok(())
1151 /// # }
1152 /// # example().unwrap();
1153 /// ```
1154 ///
1155 /// For [ASCII] and [binary] strings, the given codepoint must be a valid
1156 /// byte:
1157 ///
1158 /// ```
1159 /// use spinoso_string::String;
1160 ///
1161 /// # fn example() -> Result<(), spinoso_string::InvalidCodepointError> {
1162 /// let mut s = String::binary(b"".to_vec());
1163 /// s.try_push_codepoint(b'a' as i64)?;
1164 /// assert_eq!(s, "a");
1165 /// assert!(s.try_push_codepoint(1024).is_err());
1166 /// assert!(s.try_push_codepoint(-1).is_err());
1167 /// # Ok(())
1168 /// # }
1169 /// # example().unwrap();
1170 /// ```
1171 ///
1172 /// [UTF-8]: crate::Encoding::Utf8
1173 /// [ASCII]: crate::Encoding::Ascii
1174 /// [binary]: crate::Encoding::Binary
1175 /// [`push_char`]: Self::push_char
1176 /// [`push_byte`]: Self::push_byte
1177 /// [`String#<<`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-3C-3C
1178 /// [ruby-integer]: https://ruby-doc.org/core-3.1.2/Integer.html
1179 /// [conventionally UTF-8]: crate::Encoding::Utf8
1180 #[inline]
1181 pub fn try_push_codepoint(&mut self, codepoint: i64) -> Result<(), InvalidCodepointError> {
1182 self.inner.try_push_codepoint(codepoint)
1183 }
1184
1185 /// A more permissive version of [`try_push_codepoint`] which can alter the
1186 /// receiver's encoding to accommodate the given byte.
1187 ///
1188 /// # Errors
1189 ///
1190 /// If this `String` is [conventionally UTF-8] and the given codepoint is
1191 /// not a valid [`char`], an error is returned.
1192 ///
1193 /// If this `String` has [ASCII] or [binary] encoding and the given
1194 /// codepoint is not a valid byte, an error is returned.
1195 ///
1196 /// # Examples
1197 ///
1198 /// For [UTF-8] and [binary] strings, this function behaves identically to
1199 /// [`try_push_codepoint`].
1200 ///
1201 /// ```
1202 /// use spinoso_string::String;
1203 ///
1204 /// # fn example() -> Result<(), spinoso_string::InvalidCodepointError> {
1205 /// let mut s = String::utf8(b"".to_vec());
1206 /// s.try_push_int(b'a' as i64)?;
1207 /// assert_eq!(s, "a");
1208 /// assert!(s.try_push_int(0xD83F).is_err());
1209 /// assert!(s.try_push_int(-1).is_err());
1210 ///
1211 /// let mut s = String::binary(b"".to_vec());
1212 /// s.try_push_int(b'a' as i64)?;
1213 /// assert_eq!(s, "a");
1214 /// assert!(s.try_push_int(1024).is_err());
1215 /// assert!(s.try_push_int(-1).is_err());
1216 /// # Ok(())
1217 /// # }
1218 /// # example().unwrap();
1219 /// ```
1220 ///
1221 /// For [ASCII] strings, the given integer must be a valid byte. If the
1222 /// given integer is outside of the ASCII range, the string's encoding is
1223 /// changed to [`Encoding::Binary`].
1224 ///
1225 /// ```
1226 /// use spinoso_string::{Encoding, String};
1227 ///
1228 /// # fn example() -> Result<(), spinoso_string::InvalidCodepointError> {
1229 /// let mut s = String::ascii(b"".to_vec());
1230 /// s.try_push_int(b'a' as i64)?;
1231 /// assert_eq!(s, "a");
1232 /// assert_eq!(s.encoding(), Encoding::Ascii);
1233 /// assert!(s.try_push_int(1024).is_err());
1234 /// assert!(s.try_push_int(-1).is_err());
1235 ///
1236 /// s.try_push_int(b'\xFF' as i64)?;
1237 /// assert_eq!(s, b"a\xFF");
1238 /// assert_eq!(s.encoding(), Encoding::Binary);
1239 /// # Ok(())
1240 /// # }
1241 /// # example().unwrap();
1242 /// ```
1243 ///
1244 /// [`try_push_codepoint`]: Self::try_push_codepoint
1245 /// [UTF-8]: crate::Encoding::Utf8
1246 /// [ASCII]: crate::Encoding::Ascii
1247 /// [binary]: crate::Encoding::Binary
1248 /// [conventionally UTF-8]: crate::Encoding::Utf8
1249 #[inline]
1250 pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> {
1251 self.inner.try_push_int(int)
1252 }
1253
1254 /// Appends a given [`char`] onto the end of this `String`.
1255 ///
1256 /// The given char is UTF-8 encoded and the UTF-8 bytes are appended to the
1257 /// end of this `String`.
1258 ///
1259 /// # Examples
1260 ///
1261 /// ```
1262 /// use spinoso_string::String;
1263 ///
1264 /// let mut s = String::from("<3");
1265 /// s.push_char('π');
1266 /// assert_eq!(s, b"<3\xF0\x9F\x92\x8E"); // "<3π"
1267 /// ```
1268 #[inline]
1269 pub fn push_char(&mut self, ch: char) {
1270 self.inner.push_char(ch);
1271 }
1272
1273 /// Appends a given string slice onto the end of this `String`.
1274 ///
1275 /// # Examples
1276 ///
1277 /// ```
1278 /// use spinoso_string::String;
1279 ///
1280 /// let mut s = String::utf8(b"spinoso".to_vec());
1281 /// s.push_str("-string");
1282 /// assert_eq!(s, "spinoso-string");
1283 /// ```
1284 #[inline]
1285 pub fn push_str(&mut self, s: &str) {
1286 self.inner.push_str(s);
1287 }
1288
1289 /// Copies and appends all bytes in a slice to the `String`.
1290 ///
1291 /// Iterates over the slice `other`, copies each element, and then appends
1292 /// it to this `String`. The other byte slice is traversed in-order.
1293 ///
1294 /// # Examples
1295 ///
1296 /// ```
1297 /// use spinoso_string::String;
1298 ///
1299 /// let mut s = String::from("a");
1300 /// s.extend_from_slice(&b"bc"[..]);
1301 /// assert_eq!(s, "abc");
1302 /// ```
1303 #[inline]
1304 pub fn extend_from_slice(&mut self, other: &[u8]) {
1305 self.inner.extend_from_slice(other);
1306 }
1307}
1308
1309// Ruby APIs
1310impl String {
1311 /// Appends the given bytes to this `String`.
1312 ///
1313 /// See also [`Extend`].
1314 ///
1315 /// This function can be used to implement the Ruby method [`String#<<`] for
1316 /// [`String`][ruby-string] arguments.
1317 ///
1318 /// # Examples
1319 ///
1320 /// ```
1321 /// use spinoso_string::String;
1322 ///
1323 /// let mut s = String::ascii(b"abc".to_vec());
1324 /// s.concat(", easy as 123");
1325 /// assert_eq!(s, "abc, easy as 123");
1326 /// ```
1327 ///
1328 /// [`String#<<`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-3C-3C
1329 /// [ruby-string]: https://ruby-doc.org/core-3.1.2/String.html
1330 #[inline]
1331 pub fn concat<T: AsRef<[u8]>>(&mut self, other: T) {
1332 let other = other.as_ref();
1333 self.inner.extend_from_slice(other);
1334 }
1335
1336 /// Returns true for a string which has only ASCII characters.
1337 ///
1338 /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
1339 /// an ASCII codepoint if and only if it is in the inclusive range
1340 /// `[0, 127]`.
1341 ///
1342 /// This function ignores this `String`'s [encoding].
1343 ///
1344 /// # Examples
1345 ///
1346 /// ```
1347 /// use spinoso_string::String;
1348 ///
1349 /// let s = String::utf8("abc".as_bytes().to_vec());
1350 /// assert!(s.is_ascii_only());
1351 /// let s = String::utf8("abc\u{6666}".as_bytes().to_vec());
1352 /// assert!(!s.is_ascii_only());
1353 /// ```
1354 ///
1355 /// [encoding]: crate::Encoding
1356 #[inline]
1357 #[must_use]
1358 pub fn is_ascii_only(&self) -> bool {
1359 self.inner.is_ascii_only()
1360 }
1361
1362 /// Return a newly encoded `String` with [`Encoding::Binary`] encoding.
1363 ///
1364 /// This function can be used to implement the Ruby method [`String#b`].
1365 ///
1366 /// # Examples
1367 ///
1368 /// ```
1369 /// use spinoso_string::{Encoding, String};
1370 ///
1371 /// let s = String::utf8(b"xyz".to_vec());
1372 /// assert_eq!(s.encoding(), Encoding::Utf8);
1373 /// let b = s.to_binary();
1374 /// assert_eq!(b.encoding(), Encoding::Binary);
1375 /// assert_eq!(s.as_slice(), b.as_slice());
1376 /// ```
1377 ///
1378 /// [`String#b`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-b
1379 #[inline]
1380 #[must_use]
1381 pub fn to_binary(&self) -> Self {
1382 String::binary(self.inner.as_slice().to_vec())
1383 }
1384
1385 /// Returns the length of this `String` in bytes.
1386 ///
1387 /// `bytesize` is an [`Encoding`]-oblivious API and is equivalent to
1388 /// [`String::len`].
1389 ///
1390 /// This function can be used to implement the Ruby method
1391 /// [`String#bytesize`].
1392 ///
1393 /// # Examples
1394 ///
1395 /// ```
1396 /// use spinoso_string::String;
1397 ///
1398 /// let s = String::utf8("π".as_bytes().to_vec());
1399 /// assert_eq!(s.bytesize(), 4);
1400 /// assert_eq!(s.bytesize(), s.len());
1401 /// ```
1402 ///
1403 /// [`String#bytesize`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-bytesize
1404 #[inline]
1405 #[must_use]
1406 pub fn bytesize(&self) -> usize {
1407 self.len()
1408 }
1409
1410 /// Modify this `String` to have the first character converted to uppercase
1411 /// and the remainder to lowercase.
1412 ///
1413 /// This function can be used to implement the Ruby method [`String#capitalize`].
1414 ///
1415 /// This function is encoding-aware. `String`s with [UTF-8 encoding] are
1416 /// only [conventionally UTF-8]. Only valid UTF-8 byte sequences are
1417 /// converted to capitalized. For ASCII and binary encoded strings, this
1418 /// function converts each byte to capitalized.
1419 ///
1420 /// # Compatibility Note
1421 ///
1422 /// This function does not yet support Unicode case mapping modes, such as
1423 /// specially handling the Turkish dotless "i" or the German eszett. This is
1424 /// a known limitation.
1425 ///
1426 /// # Examples
1427 ///
1428 /// ```
1429 /// use spinoso_string::{String, CaseFoldingEffect};
1430 ///
1431 /// let mut s = String::utf8(b"hello, world!".to_vec());
1432 /// assert_eq!(s.make_capitalized(), CaseFoldingEffect::Modified);
1433 /// assert_eq!(s, "Hello, world!");
1434 /// ```
1435 ///
1436 /// [UTF-8 encoding]: crate::Encoding::Utf8
1437 /// [ASCII encoding]: crate::Encoding::Ascii
1438 /// [binary encoding]: crate::Encoding::Binary
1439 /// [`String#capitalize`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-capitalize
1440 #[inline]
1441 pub fn make_capitalized(&mut self) -> CaseFoldingEffect {
1442 self.inner.make_capitalized()
1443 }
1444
1445 /// Modify this `String` to have all characters converted to lowercase.
1446 ///
1447 /// This function can be used to implement the Ruby method [`String#downcase`].
1448 ///
1449 /// This function is encoding-aware. `String`s with [UTF-8 encoding] are
1450 /// only [conventionally UTF-8]. Only valid UTF-8 byte sequences are
1451 /// converted to lowercase. For ASCII and binary encoded strings, this
1452 /// function converts each byte to lowercase.
1453 ///
1454 /// # Compatibility Note
1455 ///
1456 /// This function does not yet support Unicode case mapping modes, such as
1457 /// specially handling the Turkish dotless "i" or the German eszett. This is
1458 /// a known limitation.
1459 ///
1460 /// # Examples
1461 ///
1462 /// ```
1463 /// use spinoso_string::{String, CaseFoldingEffect};
1464 ///
1465 /// let mut s = String::utf8(b"Hello, World!".to_vec());
1466 /// assert_eq!(s.make_lowercase(), CaseFoldingEffect::Modified);
1467 /// assert_eq!(s, "hello, world!");
1468 /// ```
1469 ///
1470 /// [UTF-8 encoding]: crate::Encoding::Utf8
1471 /// [ASCII encoding]: crate::Encoding::Ascii
1472 /// [binary encoding]: crate::Encoding::Binary
1473 /// [`String#downcase`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-downcase
1474 #[inline]
1475 pub fn make_lowercase(&mut self) -> CaseFoldingEffect {
1476 self.inner.make_lowercase()
1477 }
1478
1479 /// Modify this `String` to have the all characters converted to uppercase.
1480 ///
1481 /// This function can be used to implement the Ruby method [`String#upcase`].
1482 ///
1483 /// This function is encoding-aware. `String`s with [UTF-8 encoding] are
1484 /// only [conventionally UTF-8]. Only valid UTF-8 byte sequences are
1485 /// converted to uppercase. For ASCII and binary encoded strings, this
1486 /// function converts each byte to uppercase.
1487 ///
1488 /// # Compatibility Note
1489 ///
1490 /// This function does not yet support Unicode case mapping modes, such as
1491 /// specially handling the Turkish dotless "i" or the German eszett. This is
1492 /// a known limitation.
1493 ///
1494 /// # Examples
1495 ///
1496 /// ```
1497 /// use spinoso_string::{String, CaseFoldingEffect};
1498 ///
1499 /// let mut s = String::utf8(b"Hello, World!".to_vec());
1500 /// assert_eq!(s.make_uppercase(), CaseFoldingEffect::Modified);
1501 /// assert_eq!(s, "HELLO, WORLD!");
1502 /// ```
1503 ///
1504 /// [UTF-8 encoding]: crate::Encoding::Utf8
1505 /// [ASCII encoding]: crate::Encoding::Ascii
1506 /// [binary encoding]: crate::Encoding::Binary
1507 /// [`String#upcase`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-upcase
1508 #[inline]
1509 pub fn make_uppercase(&mut self) -> CaseFoldingEffect {
1510 self.inner.make_uppercase()
1511 }
1512
1513 /// Modify this `String` to have the case of each character inverted.
1514 ///
1515 /// This function can be used to implement the Ruby method [`String#swapcase`].
1516 ///
1517 /// This function is encoding-aware. `String`s with [UTF-8 encoding] are
1518 /// only [conventionally UTF-8]. Only valid UTF-8 byte sequences are
1519 /// converted to inverted case. For ASCII and binary encoded strings, this
1520 /// function converts each byte to inverted case.
1521 ///
1522 /// # Compatibility Note
1523 ///
1524 /// This function does not yet support Unicode case mapping modes, such as
1525 /// specially handling the Turkish dotless "i" or the German eszett. This is
1526 /// a known limitation.
1527 ///
1528 /// # Examples
1529 ///
1530 /// ```
1531 /// use spinoso_string::{String, CaseFoldingEffect};
1532 ///
1533 /// let mut s = String::utf8(b"Hello, World!".to_vec());
1534 /// assert_eq!(s.make_swapcase(), CaseFoldingEffect::Modified);
1535 /// assert_eq!(s, "hELLO, wORLD!");
1536 /// ```
1537 ///
1538 /// [UTF-8 encoding]: crate::Encoding::Utf8
1539 /// [ASCII encoding]: crate::Encoding::Ascii
1540 /// [binary encoding]: crate::Encoding::Binary
1541 /// [`String#upcase`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-upcase
1542 #[inline]
1543 pub fn make_swapcase(&mut self) -> CaseFoldingEffect {
1544 self.inner.make_swapcase()
1545 }
1546
1547 #[inline]
1548 #[must_use]
1549 #[cfg(feature = "casecmp")]
1550 #[cfg_attr(docsrs, doc(cfg(feature = "casecmp")))]
1551 pub fn ascii_casecmp(&self, other: &[u8]) -> Ordering {
1552 focaccia::ascii_casecmp(self.as_slice(), other)
1553 }
1554
1555 #[inline]
1556 #[must_use]
1557 #[cfg(feature = "casecmp")]
1558 #[cfg_attr(docsrs, doc(cfg(feature = "casecmp")))]
1559 pub fn unicode_casecmp(&self, other: &String, options: CaseFold) -> Option<bool> {
1560 let left = self.as_slice();
1561 let right = other.as_slice();
1562 // If both `String`s are conventionally UTF-8, they must be case
1563 // compared using the given case folding strategy. This requires the
1564 // `String`s be well-formed UTF-8.
1565 if let (Encoding::Utf8, Encoding::Utf8) = (self.encoding(), other.encoding()) {
1566 if let (Ok(left), Ok(right)) = (str::from_utf8(left), str::from_utf8(right)) {
1567 // Both slices are UTF-8, compare with the given Unicode case
1568 // folding scheme.
1569 Some(options.case_eq(left, right))
1570 } else {
1571 // At least one `String` contains invalid UTF-8 bytes.
1572 None
1573 }
1574 } else {
1575 // At least one slice is not conventionally UTF-8, so fallback to
1576 // ASCII comparator.
1577 Some(focaccia::ascii_case_eq(left, right))
1578 }
1579 }
1580
1581 /// Centers this `String` in width with the given padding.
1582 ///
1583 /// This function returns an iterator that yields [`u8`].
1584 ///
1585 /// If width is greater than the length of this `String`, the returned
1586 /// iterator yields a byte sequence of length `width` with the byte content
1587 /// of this `String` centered and padded with the given padding; otherwise,
1588 /// yields the original bytes.
1589 ///
1590 /// If the given padding is [`None`], the `String` is padded with an ASCII
1591 /// space.
1592 ///
1593 /// # Errors
1594 ///
1595 /// If given an empty padding byte string, this function returns an error.
1596 /// This error is returned regardless of whether the `String` would be
1597 /// centered with the given
1598 ///
1599 /// # Examples
1600 ///
1601 /// ```
1602 /// use spinoso_string::String;
1603 /// # fn example() -> Result<(), spinoso_string::CenterError> {
1604 /// let s = String::from("hello");
1605 ///
1606 /// assert_eq!(s.center(4, None)?.collect::<Vec<_>>(), b"hello");
1607 /// assert_eq!(
1608 /// s.center(20, None)?.collect::<Vec<_>>(),
1609 /// b" hello "
1610 /// );
1611 /// assert_eq!(
1612 /// s.center(20, Some(&b"123"[..]))?.collect::<Vec<_>>(),
1613 /// b"1231231hello12312312"
1614 /// );
1615 /// # Ok(())
1616 /// # }
1617 /// # example().unwrap();
1618 /// ```
1619 ///
1620 /// This iterator is [encoding-aware]. [Conventionally UTF-8] strings are
1621 /// iterated by UTF-8 byte sequences.
1622 ///
1623 /// ```
1624 /// use spinoso_string::String;
1625 /// # fn example() -> Result<(), spinoso_string::CenterError> {
1626 /// let s = String::from("π");
1627 ///
1628 /// assert_eq!(s.center(3, None)?.collect::<Vec<_>>(), " π ".as_bytes());
1629 /// # Ok(())
1630 /// # }
1631 /// # example().unwrap();
1632 /// ```
1633 ///
1634 /// [`center`]: crate::String::center
1635 /// [encoding-aware]: crate::Encoding
1636 /// [Conventionally UTF-8]: crate::Encoding::Utf8
1637 #[inline]
1638 pub fn center<'a, 'b>(&'a self, width: usize, padding: Option<&'b [u8]>) -> Result<Center<'a, 'b>, CenterError> {
1639 let padding = match padding {
1640 None => b" ",
1641 Some([]) => return Err(CenterError::ZeroWidthPadding),
1642 Some(p) => p,
1643 };
1644 let padding_width = width.saturating_sub(self.char_len());
1645 Ok(Center::with_chars_width_and_padding(
1646 self.chars(),
1647 padding_width,
1648 padding,
1649 ))
1650 }
1651
1652 /// Modifies this `String` in-place with the given record separator removed
1653 /// from the end of `str` (if given).
1654 ///
1655 /// If `separator` is [`None`] (i.e. `separator` has not been changed from
1656 /// the default Ruby record separator), then `chomp` also removes carriage
1657 /// return characters (that is it will remove `\n`, `\r`, and `\r\n`). If
1658 /// `separator` is an empty string, it will remove all trailing newlines
1659 /// from the string.
1660 ///
1661 /// A [`None`] separator does not mean that `chomp` is passed a `nil`
1662 /// separator. For `str.chomp nil`, MRI returns `str.dup`. For
1663 /// `str.chomp! nil`, MRI makes no changes to the receiver and returns
1664 /// `nil`.
1665 ///
1666 /// This function returns `true` if self is modified, `false` otherwise.
1667 ///
1668 /// # Examples
1669 ///
1670 /// ```
1671 /// use spinoso_string::String;
1672 ///
1673 /// let mut s = String::utf8(b"This is a paragraph.\r\n\n\n".to_vec());
1674 /// let modified = s.chomp(None::<&[u8]>);
1675 /// assert!(modified);
1676 /// assert_eq!(s, "This is a paragraph.\r\n\n");
1677 ///
1678 /// let mut s = String::utf8(b"This is a paragraph.\r\n\n\n".to_vec());
1679 /// let modified = s.chomp(Some(""));
1680 /// assert!(modified);
1681 /// assert_eq!(s, "This is a paragraph.");
1682 ///
1683 /// let mut s = String::utf8(b"hello\r\n\r\r\n".to_vec());
1684 /// let modified = s.chomp(None::<&[u8]>);
1685 /// assert!(modified);
1686 /// assert_eq!(s, "hello\r\n\r");
1687 ///
1688 /// let mut s = String::utf8(b"hello\r\n\r\r\n".to_vec());
1689 /// let modified = s.chomp(Some(""));
1690 /// assert!(modified);
1691 /// assert_eq!(s, "hello\r\n\r");
1692 ///
1693 /// let mut s = String::utf8(b"This is a paragraph.".to_vec());
1694 /// let modified = s.chomp(Some("."));
1695 /// assert!(modified);
1696 /// assert_eq!(s, "This is a paragraph");
1697 ///
1698 /// let mut s = String::utf8(b"This is a paragraph.".to_vec());
1699 /// let modified = s.chomp(Some("abc"));
1700 /// assert!(!modified);
1701 /// assert_eq!(s, "This is a paragraph.");
1702 /// ```
1703 #[inline]
1704 #[must_use]
1705 pub fn chomp<T: AsRef<[u8]>>(&mut self, separator: Option<T>) -> bool {
1706 // convert to a concrete type and delegate to a single `chomp` impl
1707 // to minimize code duplication when monomorphizing.
1708 if let Some(sep) = separator {
1709 chomp(self, Some(sep.as_ref()))
1710 } else {
1711 chomp(self, None)
1712 }
1713 }
1714
1715 /// Modifies this `String` in-place and removes the last character.
1716 ///
1717 /// This method returns a [`bool`] that indicates if this string was modified.
1718 ///
1719 /// If the string ends with `\r\n`, both characters are removed. When
1720 /// applying `chop` to an empty string, the string remains empty.
1721 ///
1722 /// [`String::chomp`] is often a safer alternative, as it leaves the string
1723 /// unchanged if it doesn't end in a record separator.
1724 ///
1725 /// # Examples
1726 ///
1727 /// ```
1728 /// use spinoso_string::String;
1729 ///
1730 /// let mut s = String::utf8(b"This is a paragraph.\r\n".to_vec());
1731 /// let modified = s.chop();
1732 /// assert!(modified);
1733 /// assert_eq!(s, "This is a paragraph.");
1734 ///
1735 /// let mut s = String::utf8(b"This is a paragraph.".to_vec());
1736 /// let modified = s.chop();
1737 /// assert!(modified);
1738 /// assert_eq!(s, "This is a paragraph");
1739 ///
1740 /// let mut s = String::utf8(b"".to_vec());
1741 /// let modified = s.chop();
1742 /// assert!(!modified);
1743 /// assert_eq!(s, "");
1744 ///
1745 /// let mut s = String::utf8(b"x".to_vec());
1746 /// let modified = s.chop();
1747 /// assert!(modified);
1748 /// assert_eq!(s, "");
1749 /// ```
1750 #[inline]
1751 #[must_use]
1752 pub fn chop(&mut self) -> bool {
1753 if self.is_empty() {
1754 return false;
1755 }
1756 let bytes_to_remove = if self.inner.ends_with(b"\r\n") {
1757 2
1758 } else if let Encoding::Utf8 = self.encoding() {
1759 let (ch, size) = bstr::decode_last_utf8(self.as_slice());
1760 if ch.is_some() { size } else { 1 }
1761 } else {
1762 // `buf` is checked to be non-empty above.
1763 1
1764 };
1765 // This subtraction is guaranteed to not panic because we have validated
1766 // that we're removing a subslice of `buf`.
1767 self.truncate(self.len() - bytes_to_remove);
1768 true
1769 }
1770
1771 /// Returns a one-character string at the beginning of the string.
1772 ///
1773 /// # Examples
1774 ///
1775 /// [Conventionally UTF-8] `String`s perform a partial UTF-8 decode to
1776 /// compute the first character.
1777 ///
1778 /// ```
1779 /// use spinoso_string::String;
1780 ///
1781 /// let s = String::utf8(b"abcde".to_vec());
1782 /// assert_eq!(s.chr(), &b"a"[..]);
1783 ///
1784 /// let s = String::utf8(b"".to_vec());
1785 /// assert_eq!(s.chr(), &[]);
1786 ///
1787 /// let s = String::utf8("π¦spinosoπ".as_bytes().to_vec());
1788 /// assert_eq!(s.chr(), &b"\xF0\x9F\xA6\x80"[..]);
1789 ///
1790 /// let s = String::utf8(b"\xFFspinoso".to_vec());
1791 /// assert_eq!(s.chr(), &b"\xFF"[..]);
1792 /// ```
1793 ///
1794 /// For [ASCII] and [binary] `String`s this function returns a slice of the
1795 /// first byte or the empty slice if the `String` is empty.
1796 ///
1797 /// ```
1798 /// use spinoso_string::String;
1799 ///
1800 /// let s = String::binary(b"abcde".to_vec());
1801 /// assert_eq!(s.chr(), &b"a"[..]);
1802 ///
1803 /// let s = String::binary(b"".to_vec());
1804 /// assert_eq!(s.chr(), &[]);
1805 ///
1806 /// let s = String::binary("π¦spinosoπ".as_bytes().to_vec());
1807 /// assert_eq!(s.chr(), &b"\xF0"[..]);
1808 ///
1809 /// let s = String::binary(b"\xFFspinoso".to_vec());
1810 /// assert_eq!(s.chr(), &b"\xFF"[..]);
1811 /// ```
1812 ///
1813 /// [Conventionally UTF-8]: Encoding::Utf8
1814 /// [ASCII]: crate::Encoding::Ascii
1815 /// [binary]: crate::Encoding::Binary
1816 #[inline]
1817 #[must_use]
1818 pub fn chr(&self) -> &[u8] {
1819 self.inner.chr()
1820 }
1821
1822 /// Returns the char-based index of the first occurrence of the given
1823 /// substring in this `String`.
1824 ///
1825 /// Returns [`None`] if not found. If the second parameter is present, it
1826 /// specifies the character position in the string to begin the search.
1827 ///
1828 /// This function can be used to implement [`String#index`].
1829 ///
1830 /// # Examples
1831 ///
1832 /// ```
1833 /// use spinoso_string::String;
1834 ///
1835 /// let s = String::utf8("via π v3.2.0".as_bytes().to_vec());
1836 /// assert_eq!(s.index("a", None), Some(2));
1837 /// assert_eq!(s.index("a", Some(2)), Some(2));
1838 /// assert_eq!(s.index("a", Some(3)), None);
1839 /// assert_eq!(s.index("π", None), Some(4));
1840 /// assert_eq!(s.index(".", None), Some(8));
1841 /// assert_eq!(s.index("v", Some(6)), Some(6));
1842 /// assert_eq!(s.index("v", Some(7)), None);
1843 /// assert_eq!(s.index("X", None), None);
1844 /// ```
1845 ///
1846 /// [`String#index`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-index
1847 #[inline]
1848 #[must_use]
1849 pub fn index<T: AsRef<[u8]>>(&self, needle: T, offset: Option<usize>) -> Option<usize> {
1850 let offset = offset.unwrap_or(0);
1851 self.inner.index(needle.as_ref(), offset)
1852 }
1853
1854 /// Returns the char-based index of the last occurrence of the given
1855 /// substring in this `String`.
1856 ///
1857 /// Returns [`None`] if not found. If the second parameter is present, it
1858 /// specifies the character position in the string to begin the search.
1859 ///
1860 /// This function can be used to implement [`String#rindex`].
1861 ///
1862 /// # Examples
1863 ///
1864 /// ```
1865 /// use spinoso_string::String;
1866 ///
1867 /// let s = String::utf8("via π v3.2.0".as_bytes().to_vec());
1868 /// assert_eq!(s.rindex("v", None), Some(6));
1869 /// assert_eq!(s.rindex("v", Some(5)), Some(0));
1870 /// assert_eq!(s.rindex("v", Some(6)), Some(6));
1871 /// assert_eq!(s.rindex("a", None), Some(2));
1872 /// ```
1873 ///
1874 /// [`String#rindex`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-rindex
1875 #[inline]
1876 #[must_use]
1877 pub fn rindex<T: AsRef<[u8]>>(&self, needle: T, offset: Option<usize>) -> Option<usize> {
1878 let offset = offset.unwrap_or_else(|| self.inner.char_len().checked_sub(1).unwrap_or_default());
1879 self.inner.rindex(needle.as_ref(), offset)
1880 }
1881
1882 /// Returns the byte-based index of the first occurrence of the given
1883 /// substring in this `String`.
1884 ///
1885 /// Returns [`None`] if not found. If the second parameter is present, it
1886 /// specifies the byte position in the string to begin the search.
1887 ///
1888 /// This function can be used to implement [`String#byteindex`].
1889 ///
1890 /// # Examples
1891 ///
1892 /// ```
1893 /// use spinoso_string::String;
1894 ///
1895 /// let s = String::utf8("via π v3.2.0".as_bytes().to_vec());
1896 /// assert_eq!(s.byteindex("a", None), Some(2));
1897 /// assert_eq!(s.byteindex("a", Some(2)), Some(2));
1898 /// assert_eq!(s.byteindex("a", Some(3)), None);
1899 /// assert_eq!(s.byteindex("π", None), Some(4));
1900 /// assert_eq!(s.byteindex(".", None), Some(11));
1901 /// assert_eq!(s.byteindex("X", None), None);
1902 /// ```
1903 ///
1904 /// [`String#byteindex`]: https://ruby-doc.org/3.2.0/String.html#method-i-byteindex
1905 #[inline]
1906 #[must_use]
1907 pub fn byteindex<T: AsRef<[u8]>>(&self, needle: T, offset: Option<usize>) -> Option<usize> {
1908 fn inner(buf: &[u8], needle: &[u8], offset: Option<usize>) -> Option<usize> {
1909 if let Some(offset) = offset {
1910 let buf = buf.get(offset..)?;
1911 let index = buf.find(needle)?;
1912 // This addition is guaranteed not to overflow because the result is
1913 // a valid index of the underlying `Vec`.
1914 //
1915 // `self.buf.len() < isize::MAX` because `self.buf` is a `Vec` and
1916 // `Vec` documents `isize::MAX` as its maximum allocation size.
1917 Some(index + offset)
1918 } else {
1919 buf.find(needle)
1920 }
1921 }
1922 // convert to a concrete type and delegate to a single `index` impl
1923 // to minimize code duplication when monomorphizing.
1924 let needle = needle.as_ref();
1925 inner(self.inner.as_slice(), needle, offset)
1926 }
1927
1928 /// Returns the byte-based index of the last occurrence of the given
1929 /// substring in this `String`.
1930 ///
1931 /// Returns [`None`] if not found. If the second parameter is present, it
1932 /// specifies the byte position in the string to begin the search.
1933 ///
1934 /// This function can be used to implement [`String#rindex`].
1935 ///
1936 /// # Examples
1937 ///
1938 /// ```
1939 /// use spinoso_string::String;
1940 ///
1941 /// let s = String::utf8("via π v3.2.0".as_bytes().to_vec());
1942 /// assert_eq!(s.byterindex("v", None), Some(9));
1943 /// assert_eq!(s.byterindex("a", None), Some(2));
1944 /// ```
1945 ///
1946 /// [`String#byterindex`]: https://ruby-doc.org/3.2.0/String.html#method-i-byterindex
1947 #[inline]
1948 #[must_use]
1949 pub fn byterindex<T: AsRef<[u8]>>(&self, needle: T, offset: Option<usize>) -> Option<usize> {
1950 fn inner(buf: &[u8], needle: &[u8], offset: Option<usize>) -> Option<usize> {
1951 if let Some(offset) = offset {
1952 let end = buf.len().checked_sub(offset).unwrap_or_default();
1953 let buf = buf.get(..end)?;
1954 buf.rfind(needle)
1955 } else {
1956 buf.rfind(needle)
1957 }
1958 }
1959 // convert to a concrete type and delegate to a single `rindex` impl
1960 // to minimize code duplication when monomorphizing.
1961 let needle = needle.as_ref();
1962 inner(self.inner.as_slice(), needle, offset)
1963 }
1964
1965 /// Returns an iterator that yields a debug representation of the `String`.
1966 ///
1967 /// This iterator produces [`char`] sequences like `"spinoso"` and
1968 /// `"invalid-\xFF-utf8"`.
1969 ///
1970 /// This function can be used to implement the Ruby method
1971 /// [`String#inspect`].
1972 ///
1973 /// This iterator is encoding-aware. This iterator may yield different
1974 /// `char`s for the same underlying byte contents depending on the string's
1975 /// encoding.
1976 ///
1977 /// [`String#inspect`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-inspect:
1978 #[inline]
1979 pub fn inspect(&self) -> Inspect<'_> {
1980 Inspect::new(self.inner.inspect())
1981 }
1982
1983 /// Returns the Integer ordinal of a one-character string.
1984 ///
1985 /// # Errors
1986 ///
1987 /// If this `String` is empty, an error is returned.
1988 ///
1989 /// If this `String` is [conventionally UTF-8] and the string contents begin
1990 /// with an invalid UTF-8 byte sequence, an error is returned.
1991 ///
1992 /// [conventionally UTF-8]: crate::Encoding::Utf8
1993 #[inline]
1994 pub fn ord(&self) -> Result<u32, OrdError> {
1995 self.inner.ord()
1996 }
1997}
1998
1999// Encoding-aware APIs.
2000impl String {
2001 /// Returns an iterator over the chars of a `String`.
2002 ///
2003 /// This function is encoding-aware. `String`s with [UTF-8 encoding] are
2004 /// only [conventionally UTF-8]. This iterator yields `&[u8]` byte slices
2005 /// that correspond to either a valid UTF-8 byte sequence or a single
2006 /// invalid UTF-8 byte. For [ASCII encoded] and [binary encoded] strings,
2007 /// this iterator yields slices of single bytes.
2008 ///
2009 /// For UTF-8 encoded strings, the yielded byte slices can be parsed into
2010 /// [`char`]s with [`str::from_utf8`] and [`str::chars`].
2011 ///
2012 /// # Examples
2013 ///
2014 /// Iterating over the characters of a conventionally UTF-8 string:
2015 ///
2016 /// ```
2017 /// use core::str;
2018 ///
2019 /// use spinoso_string::String;
2020 ///
2021 /// let s = String::utf8(b"ab\xF0\x9F\x92\x8E\xFF".to_vec());
2022 /// let mut chars = s.chars();
2023 /// assert_eq!(chars.next(), Some(&b"a"[..]));
2024 /// assert_eq!(chars.next().map(str::from_utf8), Some(Ok("b")));
2025 /// assert_eq!(chars.next(), Some(&[0xF0, 0x9F, 0x92, 0x8E][..]));
2026 /// assert_eq!(chars.next(), Some(&b"\xFF"[..]));
2027 /// assert_eq!(chars.next(), None);
2028 /// ```
2029 ///
2030 /// Iterating over the characters of a binary string:
2031 ///
2032 /// ```
2033 /// use spinoso_string::String;
2034 ///
2035 /// let s = String::binary("π".as_bytes().to_vec());
2036 /// let mut chars = s.chars();
2037 /// assert_eq!(chars.next(), Some(&[0xF0][..]));
2038 /// assert_eq!(chars.next(), Some(&[0x9F][..]));
2039 /// assert_eq!(chars.next(), Some(&[0x92][..]));
2040 /// assert_eq!(chars.next(), Some(&[0x8E][..]));
2041 /// assert_eq!(chars.next(), None);
2042 /// ```
2043 ///
2044 /// [UTF-8 encoding]: crate::Encoding::Utf8
2045 /// [conventionally UTF-8]: crate::Encoding::Utf8
2046 /// [ASCII encoded]: crate::Encoding::Ascii
2047 /// [binary encoded]: crate::Encoding::Binary
2048 /// [`str::from_utf8`]: core::str::from_utf8
2049 #[inline]
2050 #[must_use]
2051 pub fn chars(&self) -> Chars<'_> {
2052 Chars::from(self)
2053 }
2054
2055 /// Returns an iterator over the `u32` codepoints of a `String`.
2056 ///
2057 /// This function is encoding-aware. `String`s with [UTF-8 encoding] are
2058 /// only [conventionally UTF-8]. This function only returns `Ok` for
2059 /// `String`s with UTF-8 encoding if the underlying bytes in the `String`
2060 /// are valid UTF-8. For UTF-8 `String`s, this iterator yields the `u32`
2061 /// values of the [`char`]s in the byte string. For [ASCII encoded] and
2062 /// [binary encoded] strings, this iterator yields slices of single bytes.
2063 ///
2064 /// For UTF-8 encoded strings, the yielded byte slices can be parsed into
2065 /// [`char`]s with `.into()`.
2066 ///
2067 /// # Errors
2068 ///
2069 /// This function requires the `String` contents to be well-formed with
2070 /// respect to its encoding. This function will return an error if the
2071 /// `String` has UTF-8 encoding and contains invalid UTF-8 byte sequences.
2072 ///
2073 /// # Examples
2074 ///
2075 /// Iterating over the codepoints of a conventionally UTF-8 string:
2076 ///
2077 /// ```
2078 /// use spinoso_string::{CodepointsError, String};
2079 ///
2080 /// # fn example() -> Result<(), spinoso_string::CodepointsError> {
2081 /// let s = String::utf8(b"ab\xF0\x9F\x92\x8E\xFF".to_vec());
2082 /// assert!(matches!(
2083 /// s.codepoints(),
2084 /// Err(CodepointsError::InvalidUtf8Codepoint)
2085 /// ));
2086 ///
2087 /// let s = String::utf8("π".as_bytes().to_vec());
2088 /// let mut codepoints = s.codepoints()?;
2089 /// assert_eq!(codepoints.next(), Some(u32::from('π')));
2090 /// assert_eq!(codepoints.next(), None);
2091 /// # Ok(())
2092 /// # }
2093 /// # example().unwrap();
2094 /// ```
2095 ///
2096 /// Iterating over the codepoints of a binary string:
2097 ///
2098 /// ```
2099 /// use spinoso_string::String;
2100 ///
2101 /// # fn example() -> Result<(), spinoso_string::CodepointsError> {
2102 /// let s = String::binary("π".as_bytes().to_vec());
2103 /// let mut codepoints = s.codepoints()?;
2104 /// assert_eq!(codepoints.next(), Some(0xF0));
2105 /// assert_eq!(codepoints.next(), Some(0x9F));
2106 /// assert_eq!(codepoints.next(), Some(0x92));
2107 /// assert_eq!(codepoints.next(), Some(0x8E));
2108 /// assert_eq!(codepoints.next(), None);
2109 /// # Ok(())
2110 /// # }
2111 /// # example().unwrap();
2112 /// ```
2113 ///
2114 /// [UTF-8 encoding]: crate::Encoding::Utf8
2115 /// [conventionally UTF-8]: crate::Encoding::Utf8
2116 /// [ASCII encoded]: crate::Encoding::Ascii
2117 /// [binary encoded]: crate::Encoding::Binary
2118 /// [`str::from_utf8`]: core::str::from_utf8
2119 #[inline]
2120 pub fn codepoints(&self) -> Result<Codepoints<'_>, CodepointsError> {
2121 Codepoints::try_from(self)
2122 }
2123
2124 /// Returns the character length of this `String`.
2125 ///
2126 /// This function is encoding-aware. For `String`s with [UTF-8 encoding],
2127 /// multi-byte Unicode characters are length 1 and invalid UTF-8 bytes are
2128 /// length 1. For `String`s with [ASCII encoding] or [binary encoding],
2129 /// this function is equivalent to [`len`] and [`bytesize`].
2130 ///
2131 /// # Examples
2132 ///
2133 /// ```
2134 /// use spinoso_string::String;
2135 ///
2136 /// let s = String::utf8(b"abc\xF0\x9F\x92\x8E\xFF".to_vec()); // "abcπ\xFF"
2137 /// assert_eq!(s.char_len(), 5);
2138 ///
2139 /// let b = String::binary(b"abc\xF0\x9F\x92\x8E\xFF".to_vec()); // "abcπ\xFF"
2140 /// assert_eq!(b.char_len(), 8);
2141 /// ```
2142 ///
2143 /// [UTF-8 encoding]: crate::Encoding::Utf8
2144 /// [ASCII encoding]: crate::Encoding::Ascii
2145 /// [binary encoding]: crate::Encoding::Binary
2146 /// [`len`]: Self::len
2147 /// [`bytesize`]: Self::bytesize
2148 #[inline]
2149 #[must_use]
2150 pub fn char_len(&self) -> usize {
2151 self.inner.char_len()
2152 }
2153
2154 /// Returns the `index`'th character in the string.
2155 ///
2156 /// This function is encoding-aware. For `String`s with [UTF-8 encoding],
2157 /// multi-byte Unicode characters are length 1 and invalid UTF-8 bytes are
2158 /// length 1. For `String`s with [ASCII encoding] or [binary encoding],
2159 /// this function is equivalent to [`get`] with a range of length 1.
2160 ///
2161 /// # Examples
2162 ///
2163 /// ```
2164 /// use spinoso_string::String;
2165 ///
2166 /// let s = String::utf8(b"abc\xF0\x9F\x92\x8E\xFF".to_vec()); // "abcπ\xFF"
2167 /// assert_eq!(s.get_char(0), Some(&b"a"[..]));
2168 /// assert_eq!(s.get_char(1), Some(&b"b"[..]));
2169 /// assert_eq!(s.get_char(2), Some(&b"c"[..]));
2170 /// assert_eq!(s.get_char(3), Some("π".as_bytes()));
2171 /// assert_eq!(s.get_char(4), Some(&b"\xFF"[..]));
2172 /// assert_eq!(s.get_char(5), None);
2173 ///
2174 /// let b = String::binary(b"abc\xF0\x9F\x92\x8E\xFF".to_vec()); // "abcπ\xFF"
2175 /// assert_eq!(b.get_char(0), Some(&b"a"[..]));
2176 /// assert_eq!(b.get_char(1), Some(&b"b"[..]));
2177 /// assert_eq!(b.get_char(2), Some(&b"c"[..]));
2178 /// assert_eq!(b.get_char(3), Some(&b"\xF0"[..]));
2179 /// assert_eq!(b.get_char(4), Some(&b"\x9F"[..]));
2180 /// assert_eq!(b.get_char(5), Some(&b"\x92"[..]));
2181 /// assert_eq!(b.get_char(6), Some(&b"\x8E"[..]));
2182 /// assert_eq!(b.get_char(7), Some(&b"\xFF"[..]));
2183 /// assert_eq!(b.get_char(8), None);
2184 /// ```
2185 ///
2186 /// [UTF-8 encoding]: crate::Encoding::Utf8
2187 /// [ASCII encoding]: crate::Encoding::Ascii
2188 /// [binary encoding]: crate::Encoding::Binary
2189 /// [`get`]: Self::get
2190 #[inline]
2191 #[must_use]
2192 pub fn get_char(&self, index: usize) -> Option<&'_ [u8]> {
2193 // `Vec` has a max allocation size of `isize::MAX`. For a `Vec<u8>` like
2194 // the one in `String` where the `size_of::<u8>() == 1`, the max length
2195 // is `isize::MAX`. This checked add short circuits with `None` if we
2196 // are given `usize::MAX` as an index, which we could never slice.
2197 index.checked_add(1)?;
2198
2199 self.inner.get_char(index)
2200 }
2201
2202 /// Returns a substring of characters in the string.
2203 ///
2204 /// This function is encoding-aware. For `String`s with [UTF-8 encoding],
2205 /// multi-byte Unicode characters are length 1 and invalid UTF-8 bytes are
2206 /// length 1. For `String`s with [ASCII encoding] or [binary encoding],
2207 /// this function is equivalent to [`get`] with a range.
2208 ///
2209 /// # Examples
2210 ///
2211 /// ```
2212 /// use spinoso_string::String;
2213 ///
2214 /// let s = String::ascii(b"abc".to_vec());
2215 /// assert_eq!(s.get_char_slice(1..3), Some("bc".as_bytes()));
2216 /// assert_eq!(s.get_char_slice(10..15), None);
2217 ///
2218 /// let s = String::utf8(b"abc\xF0\x9F\x92\x8E\xFF".to_vec()); // "abcπ\xFF"
2219 /// assert_eq!(s.get_char_slice(1..4), Some("bcπ".as_bytes()));
2220 /// assert_eq!(s.get_char_slice(4..1), Some("".as_bytes()));
2221 /// ```
2222 ///
2223 /// [UTF-8 encoding]: crate::Encoding::Utf8
2224 /// [ASCII encoding]: crate::Encoding::Ascii
2225 /// [binary encoding]: crate::Encoding::Binary
2226 /// [`get`]: Self::get
2227 #[inline]
2228 #[must_use]
2229 pub fn get_char_slice(&self, range: Range<usize>) -> Option<&'_ [u8]> {
2230 self.inner.get_char_slice(range)
2231 }
2232
2233 /// Returns true for a `String` which is encoded correctly.
2234 ///
2235 /// For this method to return true, `String`s with [conventionally UTF-8]
2236 /// must be well-formed UTF-8; [ASCII]-encoded `String`s must only contain
2237 /// bytes in the range `0..=127`; [binary]-encoded `String`s may contain any
2238 /// byte sequence.
2239 ///
2240 /// This method is suitable for implementing the Ruby method
2241 /// [`String#valid_encoding?`].
2242 ///
2243 /// # Examples
2244 ///
2245 /// ```
2246 /// use spinoso_string::{Encoding, String};
2247 ///
2248 /// let s = String::utf8(b"xyz".to_vec());
2249 /// assert!(s.is_valid_encoding());
2250 /// let s = String::utf8("π".to_string().into_bytes());
2251 /// assert!(s.is_valid_encoding());
2252 /// let s = String::utf8(b"abc\xFF\xFExyz".to_vec());
2253 /// assert!(!s.is_valid_encoding());
2254 ///
2255 /// let s = String::ascii(b"xyz".to_vec());
2256 /// assert!(s.is_valid_encoding());
2257 /// let s = String::ascii("π".to_string().into_bytes());
2258 /// assert!(!s.is_valid_encoding());
2259 /// let s = String::ascii(b"abc\xFF\xFExyz".to_vec());
2260 /// assert!(!s.is_valid_encoding());
2261 ///
2262 /// let s = String::binary(b"xyz".to_vec());
2263 /// assert!(s.is_valid_encoding());
2264 /// let s = String::binary("π".to_string().into_bytes());
2265 /// assert!(s.is_valid_encoding());
2266 /// let s = String::binary(b"abc\xFF\xFExyz".to_vec());
2267 /// assert!(s.is_valid_encoding());
2268 /// ```
2269 ///
2270 /// [conventionally UTF-8]: crate::Encoding::Utf8
2271 /// [ASCII]: crate::Encoding::Ascii
2272 /// [binary]: crate::Encoding::Binary
2273 /// [`String#valid_encoding?`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-valid_encoding-3F
2274 #[inline]
2275 #[must_use]
2276 pub fn is_valid_encoding(&self) -> bool {
2277 self.inner.is_valid_encoding()
2278 }
2279
2280 /// Reverse the characters in the string.
2281 ///
2282 /// This function is encoding-aware. For `String`s with [UTF-8 encoding],
2283 /// multi-byte Unicode characters are reversed treated as one element.
2284 /// For `String`s with [ASCII encoding] or [binary encoding], this
2285 /// function is equivalent to reversing the underlying byte slice.
2286 ///
2287 /// # Examples
2288 ///
2289 /// ```
2290 /// use spinoso_string::String;
2291 ///
2292 /// let mut s = String::utf8("εθ§".as_bytes().to_vec());
2293 /// s.reverse();
2294 /// assert_eq!(s, "θ§ε");
2295 /// ```
2296 ///
2297 /// [UTF-8 encoding]: crate::Encoding::Utf8
2298 /// [ASCII encoding]: crate::Encoding::Ascii
2299 /// [binary encoding]: crate::Encoding::Binary
2300 #[inline]
2301 pub fn reverse(&mut self) {
2302 self.inner.reverse();
2303 }
2304}
2305
2306#[must_use]
2307fn chomp(string: &mut String, separator: Option<&[u8]>) -> bool {
2308 if string.is_empty() {
2309 return false;
2310 }
2311 match separator {
2312 Some([]) => {
2313 let original_len = string.len();
2314 let mut iter = string.bytes().rev().peekable();
2315 while let Some(&b'\n') = iter.peek() {
2316 iter.next();
2317 if let Some(&b'\r') = iter.peek() {
2318 iter.next();
2319 }
2320 }
2321 let truncate_to = iter.count();
2322 string.inner.truncate(truncate_to);
2323 truncate_to != original_len
2324 }
2325 Some(separator) if string.inner.ends_with(separator) => {
2326 let original_len = string.len();
2327 // This subtraction is guaranteed not to panic because
2328 // `separator` is a substring of `buf`.
2329 let truncate_to_len = original_len - separator.len();
2330 string.inner.truncate(truncate_to_len);
2331 // Separator is non-empty and we are always truncating, so this
2332 // branch always modifies the buffer.
2333 true
2334 }
2335 Some(_) => false,
2336 None => {
2337 let original_len = string.len();
2338 let mut iter = string.bytes().rev().peekable();
2339 match iter.peek() {
2340 Some(&b'\n') => {
2341 iter.next();
2342 if let Some(&b'\r') = iter.peek() {
2343 iter.next();
2344 }
2345 }
2346 Some(b'\r') => {
2347 iter.next();
2348 }
2349 Some(_) | None => {}
2350 };
2351 let truncate_to_len = iter.count();
2352 string.inner.truncate(truncate_to_len);
2353 truncate_to_len != original_len
2354 }
2355 }
2356}
2357
2358#[cfg(test)]
2359mod tests {
2360 use alloc::string::ToString;
2361
2362 use crate::String;
2363 use crate::center::CenterError;
2364
2365 #[test]
2366 fn center_returns_error_with_empty_padding() {
2367 let s = String::utf8(b"jumbo".to_vec());
2368 let center = s.center(10, Some(b""));
2369 assert!(matches!(center, Err(CenterError::ZeroWidthPadding)));
2370
2371 let center = s.center(9, Some(b""));
2372 assert!(matches!(center, Err(CenterError::ZeroWidthPadding)));
2373
2374 let center = s.center(1, Some(b""));
2375 assert!(matches!(center, Err(CenterError::ZeroWidthPadding)));
2376
2377 let center = s.center(5, Some(b""));
2378 assert!(matches!(center, Err(CenterError::ZeroWidthPadding)));
2379 }
2380
2381 #[test]
2382 fn strings_equality_is_reflexive() {
2383 // ASCII only, all encodings valid
2384 let utf8 = String::utf8(b"abc".to_vec());
2385 let ascii = String::ascii(b"abc".to_vec());
2386 let binary = String::binary(b"abc".to_vec());
2387 assert_eq!(&utf8, &utf8);
2388 assert_eq!(&ascii, &ascii);
2389 assert_eq!(&binary, &binary);
2390
2391 // Invalid UTF-8
2392 let utf8 = String::utf8(b"abc\xFE\xFF".to_vec());
2393 let ascii = String::ascii(b"abc\xFE\xFF".to_vec());
2394 let binary = String::binary(b"abc\xFE\xFF".to_vec());
2395 assert_eq!(&utf8, &utf8);
2396 assert_eq!(&ascii, &ascii);
2397 assert_eq!(&binary, &binary);
2398
2399 // Multibyte UTF-8
2400 let utf8 = String::utf8("εΎε°".to_string().into_bytes());
2401 let ascii = String::ascii("εΎε°".to_string().into_bytes());
2402 let binary = String::binary("εΎε°".to_string().into_bytes());
2403 assert_eq!(&utf8, &utf8);
2404 assert_eq!(&ascii, &ascii);
2405 assert_eq!(&binary, &binary);
2406 }
2407
2408 #[test]
2409 fn strings_compare_equal_only_based_on_byte_content_with_valid_encoding() {
2410 // ASCII only
2411 let utf8 = String::utf8(b"abc".to_vec());
2412 let ascii = String::ascii(b"abc".to_vec());
2413 let binary = String::binary(b"abc".to_vec());
2414 assert_eq!(utf8, ascii);
2415 assert_eq!(ascii, utf8);
2416 assert_eq!(utf8, binary);
2417 assert_eq!(binary, utf8);
2418 assert_eq!(binary, ascii);
2419 assert_eq!(ascii, binary);
2420 }
2421
2422 #[test]
2423 fn strings_with_multibyte_utf8_content_require_compatible_encoding_to_compare_equal() {
2424 let utf8 = String::utf8("εΎε°".to_string().into_bytes());
2425 let ascii = String::ascii("εΎε°".to_string().into_bytes());
2426 let binary = String::binary("εΎε°".to_string().into_bytes());
2427 assert_ne!(utf8, ascii);
2428 assert_ne!(ascii, utf8);
2429 assert_ne!(utf8, binary);
2430 assert_ne!(binary, utf8);
2431 assert_ne!(binary, ascii);
2432 assert_ne!(ascii, binary);
2433 }
2434
2435 #[test]
2436 fn strings_compare_unequal_with_equal_byte_content_without_valid_encoding() {
2437 // ```
2438 // [3.2.2] > utf8 = "abc\xFE\xFF"
2439 // => "abc\xFE\xFF"
2440 // [3.2.2] > utf8.encoding
2441 // => #<Encoding:UTF-8>
2442 // [3.2.2] > ascii = "abc\xFE\xFF"
2443 // => "abc\xFE\xFF"
2444 // [3.2.2] > ascii.force_encoding(Encoding::ASCII)
2445 // => "abc\xFE\xFF"
2446 // [3.2.2] > ascii.encoding
2447 // => #<Encoding:US-ASCII>
2448 // [3.2.2] > binary = "abc\xFE\xFF".b
2449 // => "abc\xFE\xFF"
2450 // [3.2.2] > binary.encoding
2451 // => #<Encoding:ASCII-8BIT>
2452 // [3.2.2] > utf8.ascii_only?
2453 // => false
2454 // [3.2.2] > ascii.ascii_only?
2455 // => false
2456 // [3.2.2] > binary.ascii_only?
2457 // => false
2458 // [3.2.2] > utf8 == ascii
2459 // => false
2460 // [3.2.2] > ascii == utf8
2461 // => false
2462 // [3.2.2] > utf8 == binary
2463 // => false
2464 // [3.2.2] > binary == utf8
2465 // => false
2466 // [3.2.2] > binary == ascii
2467 // => false
2468 // [3.2.2] > ascii == binary
2469 // => false
2470 // ```
2471 let utf8 = String::utf8(b"abc\xFE\xFF".to_vec());
2472 let ascii = String::ascii(b"abc\xFE\xFF".to_vec());
2473 let binary = String::binary(b"abc\xFE\xFF".to_vec());
2474 assert_ne!(utf8, ascii);
2475 assert_ne!(ascii, utf8);
2476 assert_ne!(utf8, binary);
2477 assert_ne!(binary, utf8);
2478 assert_ne!(binary, ascii);
2479 assert_ne!(ascii, binary);
2480 }
2481
2482 #[test]
2483 fn byteindex_supports_needle_and_haystack_of_different_encodings() {
2484 // all encodings for the receiver
2485 let utf8 = String::utf8("abcππ»".as_bytes().to_vec());
2486 let ascii = String::ascii(b"abc\xFE\xFF".to_vec());
2487 let binary = String::binary(b"abc\xFE\xFF".to_vec());
2488
2489 // Empty string as needle
2490 assert_eq!(utf8.byteindex([], None), Some(0));
2491 assert_eq!(ascii.byteindex([], None), Some(0));
2492 assert_eq!(binary.byteindex([], None), Some(0));
2493
2494 // ASCII needles
2495 let ascii_needle = String::ascii(b"b".to_vec());
2496 assert_eq!(utf8.byteindex(&ascii_needle, None), Some(1));
2497 assert_eq!(ascii.byteindex(&ascii_needle, None), Some(1));
2498 assert_eq!(binary.byteindex(&ascii_needle, None), Some(1));
2499
2500 // Binary needles
2501 let binray_needle = String::binary(b"b".to_vec());
2502 assert_eq!(utf8.byteindex(&binray_needle, None), Some(1));
2503 assert_eq!(ascii.byteindex(&binray_needle, None), Some(1));
2504 assert_eq!(binary.byteindex(&binray_needle, None), Some(1));
2505
2506 // UTF-8 needles with multibyte chars
2507 let utf8_needle = String::utf8("ππ»".as_bytes().to_vec());
2508 assert_eq!(utf8.byteindex(&utf8_needle, None), Some(3));
2509 assert_eq!(ascii.byteindex(&utf8_needle, None), None);
2510 assert_eq!(binary.byteindex(&utf8_needle, None), None);
2511
2512 // UTF-8 encoded strings that have binary contents.
2513 let utf8_needle = String::utf8([b'b', b'c'].to_vec());
2514 assert_eq!(utf8.byteindex(&utf8_needle, None), Some(1));
2515 assert_eq!(ascii.byteindex(&utf8_needle, None), Some(1));
2516 assert_eq!(binary.byteindex(&utf8_needle, None), Some(1));
2517 }
2518
2519 #[test]
2520 fn byteindex_support_specifiying_byte_position_to_start_search() {
2521 let utf8 = String::utf8("a π has 4 bytes".as_bytes().to_vec());
2522
2523 // Empty string as needle
2524 let needle = String::utf8("a".as_bytes().to_vec());
2525 assert_eq!(utf8.byteindex(&needle, None), Some(0));
2526 assert_eq!(utf8.byteindex(&needle, Some(0)), Some(0));
2527 assert_eq!(utf8.byteindex(&needle, Some(1)), Some(8));
2528 // In the middle of π
2529 assert_eq!(utf8.byteindex(&needle, Some(3)), Some(8));
2530 assert_eq!(utf8.byteindex(&needle, Some(8)), Some(8));
2531 assert_eq!(utf8.byteindex(&needle, Some(9)), None);
2532 }
2533
2534 #[test]
2535 fn rindex_support_empty_string_and_empty_needle() {
2536 // Empty needle
2537 assert_eq!(String::ascii(b"foo".to_vec()).rindex("", None), Some(3));
2538
2539 // Empty haystack
2540 assert_eq!(String::ascii(b"".to_vec()).rindex("foo", None), None);
2541
2542 // Empty haystack and needle
2543 assert_eq!(String::ascii(b"".to_vec()).rindex("", None), Some(0));
2544 }
2545}