spinoso_string/
codepoints.rs

1use core::fmt::{self, Write};
2use core::iter::FusedIterator;
3
4use crate::String;
5use crate::enc::{self};
6
7/// Error returned when failing to construct a [`Codepoints`] iterator/
8///
9/// This error is returned from [`String::codepoints`]. See its documentation
10/// for more detail.
11///
12/// This error corresponds to the [Ruby `ArgumentError` Exception class].
13///
14/// When the **std** feature of `spinoso-string` is enabled, this struct
15/// implements [`std::error::Error`].
16///
17/// [Ruby `ArgumentError` Exception class]: https://ruby-doc.org/core-3.1.2/ArgumentError.html
18/// [`std::error::Error`]: https://doc.rust-lang.org/std/error/trait.Error.html
19#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
20pub enum CodepointsError {
21    /// Error returned when calling [`String::codepoints`] on a [`String`] with
22    /// [UTF-8 encoding] which is not a valid UTF-8 byte string.
23    ///
24    /// [UTF-8 encoding]: crate::Encoding::Utf8
25    InvalidUtf8Codepoint,
26}
27
28impl CodepointsError {
29    pub const EXCEPTION_TYPE: &'static str = "ArgumentError";
30
31    /// Create a new invalid UTF-8 codepoint `CodepointsError`.
32    ///
33    /// # Examples
34    ///
35    /// ```
36    /// use spinoso_string::CodepointsError;
37    ///
38    /// const ERR: CodepointsError = CodepointsError::invalid_utf8_codepoint();
39    /// assert_eq!(ERR.message(), "invalid byte sequence in UTF-8");
40    /// ```
41    #[inline]
42    #[must_use]
43    pub const fn invalid_utf8_codepoint() -> Self {
44        Self::InvalidUtf8Codepoint
45    }
46
47    /// Retrieve the exception message associated with this center error.
48    ///
49    /// # Examples
50    ///
51    /// ```
52    /// # use spinoso_string::CodepointsError;
53    /// let err = CodepointsError::invalid_utf8_codepoint();
54    /// assert_eq!(err.message(), "invalid byte sequence in UTF-8");
55    /// ```
56    #[inline]
57    #[must_use]
58    pub const fn message(self) -> &'static str {
59        "invalid byte sequence in UTF-8"
60    }
61}
62
63impl fmt::Display for CodepointsError {
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        let CodepointsError::InvalidUtf8Codepoint = self;
66        f.write_str(self.message())
67    }
68}
69
70#[cfg(feature = "std")]
71impl std::error::Error for CodepointsError {}
72
73#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
74enum CodePointRangeError {
75    InvalidUtf8Codepoint(u32),
76    OutOfRange(i64),
77}
78
79#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
80pub struct InvalidCodepointError(CodePointRangeError);
81
82impl InvalidCodepointError {
83    pub const EXCEPTION_TYPE: &'static str = "RangeError";
84
85    #[inline]
86    #[must_use]
87    pub const fn invalid_utf8_codepoint(codepoint: u32) -> Self {
88        Self(CodePointRangeError::InvalidUtf8Codepoint(codepoint))
89    }
90
91    #[inline]
92    #[must_use]
93    pub const fn codepoint_out_of_range(codepoint: i64) -> Self {
94        Self(CodePointRangeError::OutOfRange(codepoint))
95    }
96
97    #[inline]
98    #[must_use]
99    pub const fn is_invalid_utf8(self) -> bool {
100        matches!(self.0, CodePointRangeError::InvalidUtf8Codepoint(_))
101    }
102
103    #[inline]
104    #[must_use]
105    pub const fn is_out_of_range(self) -> bool {
106        matches!(self.0, CodePointRangeError::OutOfRange(_))
107    }
108
109    #[inline]
110    #[must_use]
111    pub fn message(self) -> alloc::string::String {
112        // The longest error message is 27 bytes + a hex-encoded codepoint
113        // formatted as `0x...`.
114        const MESSAGE_MAX_LENGTH: usize = 27 + 2 + size_of::<u32>() * 2;
115        let mut s = alloc::string::String::with_capacity(MESSAGE_MAX_LENGTH);
116        // In practice, the errors from `write!` below are safe to ignore
117        // because the `core::fmt::Write` impl for `String` will never panic
118        // and these `String`s will never approach `isize::MAX` bytes.
119        //
120        // See the `core::fmt::Display` impl for `InvalidCodepointError`.
121        let _ignored = write!(s, "{self}");
122        s
123    }
124}
125
126impl fmt::Display for InvalidCodepointError {
127    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
128        match self.0 {
129            CodePointRangeError::InvalidUtf8Codepoint(codepoint) => {
130                write!(f, "invalid codepoint {codepoint:X} in UTF-8")
131            }
132            CodePointRangeError::OutOfRange(codepoint) => write!(f, "{codepoint} out of char range"),
133        }
134    }
135}
136
137#[cfg(feature = "std")]
138impl std::error::Error for InvalidCodepointError {}
139
140/// An iterator that yields a `u32` codepoints from a [`String`].
141///
142/// This struct is created by the [`codepoints`] method on a Spinoso [`String`].
143/// See its documentation for more.
144///
145/// # Examples
146///
147/// ```
148/// use spinoso_string::{CodepointsError, String};
149/// # fn example() -> Result<(), CodepointsError> {
150/// let s = String::from("hello");
151///
152/// assert_eq!(
153///     s.codepoints()?.collect::<Vec<_>>(),
154///     [104, 101, 108, 108, 111]
155/// );
156///
157/// let s = String::utf8(b"abc\xFFxyz".to_vec());
158/// assert!(matches!(
159///     s.codepoints(),
160///     Err(CodepointsError::InvalidUtf8Codepoint)
161/// ));
162///
163/// let s = String::binary(b"abc\xFFxyz".to_vec());
164/// assert_eq!(
165///     s.codepoints()?.collect::<Vec<_>>(),
166///     [97, 98, 99, 255, 120, 121, 122]
167/// );
168/// # Ok(())
169/// # }
170/// # example().unwrap();
171/// ```
172///
173/// This iterator is [encoding-aware]. [Conventionally UTF-8] strings are
174/// iterated by UTF-8 byte sequences.
175///
176/// ```
177/// use spinoso_string::String;
178/// # fn example() -> Result<(), spinoso_string::CodepointsError> {
179/// let s = String::from("💎");
180///
181/// assert_eq!(s.codepoints()?.collect::<Vec<_>>(), [u32::from('💎')]);
182/// # Ok(())
183/// # }
184/// # example().unwrap();
185/// ```
186///
187/// [`codepoints`]: crate::String::codepoints
188/// [encoding-aware]: crate::Encoding
189/// [Conventionally UTF-8]: crate::Encoding::Utf8
190#[derive(Debug, Default, Clone)]
191pub struct Codepoints<'a>(enc::Codepoints<'a>);
192
193impl<'a> TryFrom<&'a String> for Codepoints<'a> {
194    type Error = CodepointsError;
195
196    #[inline]
197    fn try_from(s: &'a String) -> Result<Self, Self::Error> {
198        s.inner.codepoints().map(Self)
199    }
200}
201
202impl Iterator for Codepoints<'_> {
203    type Item = u32;
204
205    #[inline]
206    fn next(&mut self) -> Option<Self::Item> {
207        self.0.next()
208    }
209}
210
211impl FusedIterator for Codepoints<'_> {}