spinoso_string/codepoints.rs
1use core::fmt::{self, Write};
2use core::iter::FusedIterator;
3
4use crate::String;
5use crate::enc::{self};
6
7/// Error returned when failing to construct a [`Codepoints`] iterator/
8///
9/// This error is returned from [`String::codepoints`]. See its documentation
10/// for more detail.
11///
12/// This error corresponds to the [Ruby `ArgumentError` Exception class].
13///
14/// When the **std** feature of `spinoso-string` is enabled, this struct
15/// implements [`std::error::Error`].
16///
17/// [Ruby `ArgumentError` Exception class]: https://ruby-doc.org/core-3.1.2/ArgumentError.html
18/// [`std::error::Error`]: https://doc.rust-lang.org/std/error/trait.Error.html
19#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
20pub enum CodepointsError {
21 /// Error returned when calling [`String::codepoints`] on a [`String`] with
22 /// [UTF-8 encoding] which is not a valid UTF-8 byte string.
23 ///
24 /// [UTF-8 encoding]: crate::Encoding::Utf8
25 InvalidUtf8Codepoint,
26}
27
28impl CodepointsError {
29 pub const EXCEPTION_TYPE: &'static str = "ArgumentError";
30
31 /// Create a new invalid UTF-8 codepoint `CodepointsError`.
32 ///
33 /// # Examples
34 ///
35 /// ```
36 /// use spinoso_string::CodepointsError;
37 ///
38 /// const ERR: CodepointsError = CodepointsError::invalid_utf8_codepoint();
39 /// assert_eq!(ERR.message(), "invalid byte sequence in UTF-8");
40 /// ```
41 #[inline]
42 #[must_use]
43 pub const fn invalid_utf8_codepoint() -> Self {
44 Self::InvalidUtf8Codepoint
45 }
46
47 /// Retrieve the exception message associated with this center error.
48 ///
49 /// # Examples
50 ///
51 /// ```
52 /// # use spinoso_string::CodepointsError;
53 /// let err = CodepointsError::invalid_utf8_codepoint();
54 /// assert_eq!(err.message(), "invalid byte sequence in UTF-8");
55 /// ```
56 #[inline]
57 #[must_use]
58 pub const fn message(self) -> &'static str {
59 "invalid byte sequence in UTF-8"
60 }
61}
62
63impl fmt::Display for CodepointsError {
64 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65 let CodepointsError::InvalidUtf8Codepoint = self;
66 f.write_str(self.message())
67 }
68}
69
70#[cfg(feature = "std")]
71impl std::error::Error for CodepointsError {}
72
73#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
74enum CodePointRangeError {
75 InvalidUtf8Codepoint(u32),
76 OutOfRange(i64),
77}
78
79#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
80pub struct InvalidCodepointError(CodePointRangeError);
81
82impl InvalidCodepointError {
83 pub const EXCEPTION_TYPE: &'static str = "RangeError";
84
85 #[inline]
86 #[must_use]
87 pub const fn invalid_utf8_codepoint(codepoint: u32) -> Self {
88 Self(CodePointRangeError::InvalidUtf8Codepoint(codepoint))
89 }
90
91 #[inline]
92 #[must_use]
93 pub const fn codepoint_out_of_range(codepoint: i64) -> Self {
94 Self(CodePointRangeError::OutOfRange(codepoint))
95 }
96
97 #[inline]
98 #[must_use]
99 pub const fn is_invalid_utf8(self) -> bool {
100 matches!(self.0, CodePointRangeError::InvalidUtf8Codepoint(_))
101 }
102
103 #[inline]
104 #[must_use]
105 pub const fn is_out_of_range(self) -> bool {
106 matches!(self.0, CodePointRangeError::OutOfRange(_))
107 }
108
109 #[inline]
110 #[must_use]
111 pub fn message(self) -> alloc::string::String {
112 // The longest error message is 27 bytes + a hex-encoded codepoint
113 // formatted as `0x...`.
114 const MESSAGE_MAX_LENGTH: usize = 27 + 2 + size_of::<u32>() * 2;
115 let mut s = alloc::string::String::with_capacity(MESSAGE_MAX_LENGTH);
116 // In practice, the errors from `write!` below are safe to ignore
117 // because the `core::fmt::Write` impl for `String` will never panic
118 // and these `String`s will never approach `isize::MAX` bytes.
119 //
120 // See the `core::fmt::Display` impl for `InvalidCodepointError`.
121 let _ignored = write!(s, "{self}");
122 s
123 }
124}
125
126impl fmt::Display for InvalidCodepointError {
127 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
128 match self.0 {
129 CodePointRangeError::InvalidUtf8Codepoint(codepoint) => {
130 write!(f, "invalid codepoint {codepoint:X} in UTF-8")
131 }
132 CodePointRangeError::OutOfRange(codepoint) => write!(f, "{codepoint} out of char range"),
133 }
134 }
135}
136
137#[cfg(feature = "std")]
138impl std::error::Error for InvalidCodepointError {}
139
140/// An iterator that yields a `u32` codepoints from a [`String`].
141///
142/// This struct is created by the [`codepoints`] method on a Spinoso [`String`].
143/// See its documentation for more.
144///
145/// # Examples
146///
147/// ```
148/// use spinoso_string::{CodepointsError, String};
149/// # fn example() -> Result<(), CodepointsError> {
150/// let s = String::from("hello");
151///
152/// assert_eq!(
153/// s.codepoints()?.collect::<Vec<_>>(),
154/// [104, 101, 108, 108, 111]
155/// );
156///
157/// let s = String::utf8(b"abc\xFFxyz".to_vec());
158/// assert!(matches!(
159/// s.codepoints(),
160/// Err(CodepointsError::InvalidUtf8Codepoint)
161/// ));
162///
163/// let s = String::binary(b"abc\xFFxyz".to_vec());
164/// assert_eq!(
165/// s.codepoints()?.collect::<Vec<_>>(),
166/// [97, 98, 99, 255, 120, 121, 122]
167/// );
168/// # Ok(())
169/// # }
170/// # example().unwrap();
171/// ```
172///
173/// This iterator is [encoding-aware]. [Conventionally UTF-8] strings are
174/// iterated by UTF-8 byte sequences.
175///
176/// ```
177/// use spinoso_string::String;
178/// # fn example() -> Result<(), spinoso_string::CodepointsError> {
179/// let s = String::from("💎");
180///
181/// assert_eq!(s.codepoints()?.collect::<Vec<_>>(), [u32::from('💎')]);
182/// # Ok(())
183/// # }
184/// # example().unwrap();
185/// ```
186///
187/// [`codepoints`]: crate::String::codepoints
188/// [encoding-aware]: crate::Encoding
189/// [Conventionally UTF-8]: crate::Encoding::Utf8
190#[derive(Debug, Default, Clone)]
191pub struct Codepoints<'a>(enc::Codepoints<'a>);
192
193impl<'a> TryFrom<&'a String> for Codepoints<'a> {
194 type Error = CodepointsError;
195
196 #[inline]
197 fn try_from(s: &'a String) -> Result<Self, Self::Error> {
198 s.inner.codepoints().map(Self)
199 }
200}
201
202impl Iterator for Codepoints<'_> {
203 type Item = u32;
204
205 #[inline]
206 fn next(&mut self) -> Option<Self::Item> {
207 self.0.next()
208 }
209}
210
211impl FusedIterator for Codepoints<'_> {}