spinoso_string/codepoints.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
use core::fmt::{self, Write};
use core::iter::FusedIterator;
use core::mem;
use crate::enc::{self};
use crate::String;
/// Error returned when failing to construct a [`Codepoints`] iterator/
///
/// This error is returned from [`String::codepoints`]. See its documentation
/// for more detail.
///
/// This error corresponds to the [Ruby `ArgumentError` Exception class].
///
/// When the **std** feature of `spinoso-string` is enabled, this struct
/// implements [`std::error::Error`].
///
/// [Ruby `ArgumentError` Exception class]: https://ruby-doc.org/core-3.1.2/ArgumentError.html
/// [`std::error::Error`]: https://doc.rust-lang.org/std/error/trait.Error.html
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum CodepointsError {
/// Error returned when calling [`String::codepoints`] on a [`String`] with
/// [UTF-8 encoding] which is not a valid UTF-8 byte string.
///
/// [UTF-8 encoding]: crate::Encoding::Utf8
InvalidUtf8Codepoint,
}
impl CodepointsError {
pub const EXCEPTION_TYPE: &'static str = "ArgumentError";
/// Create a new invalid UTF-8 codepoint `CodepointsError`.
///
/// # Examples
///
/// ```
/// use spinoso_string::CodepointsError;
///
/// const ERR: CodepointsError = CodepointsError::invalid_utf8_codepoint();
/// assert_eq!(ERR.message(), "invalid byte sequence in UTF-8");
/// ```
#[inline]
#[must_use]
pub const fn invalid_utf8_codepoint() -> Self {
Self::InvalidUtf8Codepoint
}
/// Retrieve the exception message associated with this center error.
///
/// # Examples
///
/// ```
/// # use spinoso_string::CodepointsError;
/// let err = CodepointsError::invalid_utf8_codepoint();
/// assert_eq!(err.message(), "invalid byte sequence in UTF-8");
/// ```
#[inline]
#[must_use]
pub const fn message(self) -> &'static str {
"invalid byte sequence in UTF-8"
}
}
impl fmt::Display for CodepointsError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let CodepointsError::InvalidUtf8Codepoint = self;
f.write_str(self.message())
}
}
#[cfg(feature = "std")]
impl std::error::Error for CodepointsError {}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
enum CodePointRangeError {
InvalidUtf8Codepoint(u32),
OutOfRange(i64),
}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct InvalidCodepointError(CodePointRangeError);
impl InvalidCodepointError {
pub const EXCEPTION_TYPE: &'static str = "RangeError";
#[inline]
#[must_use]
pub const fn invalid_utf8_codepoint(codepoint: u32) -> Self {
Self(CodePointRangeError::InvalidUtf8Codepoint(codepoint))
}
#[inline]
#[must_use]
pub const fn codepoint_out_of_range(codepoint: i64) -> Self {
Self(CodePointRangeError::OutOfRange(codepoint))
}
#[inline]
#[must_use]
pub const fn is_invalid_utf8(self) -> bool {
matches!(self.0, CodePointRangeError::InvalidUtf8Codepoint(_))
}
#[inline]
#[must_use]
pub const fn is_out_of_range(self) -> bool {
matches!(self.0, CodePointRangeError::OutOfRange(_))
}
#[inline]
#[must_use]
pub fn message(self) -> alloc::string::String {
// The longest error message is 27 bytes + a hex-encoded codepoint
// formatted as `0x...`.
const MESSAGE_MAX_LENGTH: usize = 27 + 2 + mem::size_of::<u32>() * 2;
let mut s = alloc::string::String::with_capacity(MESSAGE_MAX_LENGTH);
// In practice, the errors from `write!` below are safe to ignore
// because the `core::fmt::Write` impl for `String` will never panic
// and these `String`s will never approach `isize::MAX` bytes.
//
// See the `core::fmt::Display` impl for `InvalidCodepointError`.
let _ignored = write!(s, "{self}");
s
}
}
impl fmt::Display for InvalidCodepointError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.0 {
CodePointRangeError::InvalidUtf8Codepoint(codepoint) => {
write!(f, "invalid codepoint {codepoint:X} in UTF-8")
}
CodePointRangeError::OutOfRange(codepoint) => write!(f, "{codepoint} out of char range"),
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for InvalidCodepointError {}
/// An iterator that yields a `u32` codepoints from a [`String`].
///
/// This struct is created by the [`codepoints`] method on a Spinoso [`String`].
/// See its documentation for more.
///
/// # Examples
///
/// ```
/// use spinoso_string::{CodepointsError, String};
/// # fn example() -> Result<(), CodepointsError> {
/// let s = String::from("hello");
///
/// assert_eq!(
/// s.codepoints()?.collect::<Vec<_>>(),
/// [104, 101, 108, 108, 111]
/// );
///
/// let s = String::utf8(b"abc\xFFxyz".to_vec());
/// assert!(matches!(
/// s.codepoints(),
/// Err(CodepointsError::InvalidUtf8Codepoint)
/// ));
///
/// let s = String::binary(b"abc\xFFxyz".to_vec());
/// assert_eq!(
/// s.codepoints()?.collect::<Vec<_>>(),
/// [97, 98, 99, 255, 120, 121, 122]
/// );
/// # Ok(())
/// # }
/// # example().unwrap();
/// ```
///
/// This iterator is [encoding-aware]. [Conventionally UTF-8] strings are
/// iterated by UTF-8 byte sequences.
///
/// ```
/// use spinoso_string::String;
/// # fn example() -> Result<(), spinoso_string::CodepointsError> {
/// let s = String::from("💎");
///
/// assert_eq!(s.codepoints()?.collect::<Vec<_>>(), [u32::from('💎')]);
/// # Ok(())
/// # }
/// # example().unwrap();
/// ```
///
/// [`codepoints`]: crate::String::codepoints
/// [encoding-aware]: crate::Encoding
/// [Conventionally UTF-8]: crate::Encoding::Utf8
#[derive(Debug, Default, Clone)]
pub struct Codepoints<'a>(enc::Codepoints<'a>);
impl<'a> TryFrom<&'a String> for Codepoints<'a> {
type Error = CodepointsError;
#[inline]
fn try_from(s: &'a String) -> Result<Self, Self::Error> {
s.inner.codepoints().map(Self)
}
}
impl<'a> Iterator for Codepoints<'a> {
type Item = u32;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}
impl<'a> FusedIterator for Codepoints<'a> {}