spinoso_string/encoding.rs
1use core::fmt;
2
3// ```
4// [2.6.3] > Encoding::UTF_8.names
5// => ["UTF-8", "CP65001", "locale", "external", "filesystem"]
6// ```
7const UTF8_NAMES: &[&str] = &["UTF-8", "CP65001"];
8// ```
9// [2.6.3] > Encoding::ASCII.names
10// => ["US-ASCII", "ASCII", "ANSI_X3.4-1968", "646"]
11// ```
12const ASCII_NAMES: &[&str] = &["US-ASCII", "ASCII", "ANSI_X3.4-1968", "646"];
13// ```
14// [2.6.3] > Encoding::BINARY.names
15// => ["ASCII-8BIT", "BINARY"]
16// ```
17const BINARY_NAMES: &[&str] = &["ASCII-8BIT", "BINARY"];
18
19/// Error returned when failing to deserialize an [`Encoding`].
20///
21/// This error is returned from [`Encoding::try_from_flag`]. See its
22/// documentation for more detail.
23///
24/// When the **std** feature of `spinoso-string` is enabled, this struct
25/// implements [`std::error::Error`].
26///
27/// # Examples
28///
29/// ```
30/// # use spinoso_string::{Encoding, InvalidEncodingError};
31/// assert_eq!(
32/// Encoding::try_from_flag(255),
33/// Err(InvalidEncodingError::new())
34/// );
35/// assert_eq!(Encoding::try_from(255), Err(InvalidEncodingError::new()));
36/// ```
37///
38/// [`std::error::Error`]: https://doc.rust-lang.org/std/error/trait.Error.html
39#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
40pub struct InvalidEncodingError {
41 _private: (),
42}
43
44impl InvalidEncodingError {
45 /// Construct a new `InvalidEncodingError`.
46 ///
47 /// # Examples
48 ///
49 /// ```
50 /// # use spinoso_string::InvalidEncodingError;
51 /// const ERR: InvalidEncodingError = InvalidEncodingError::new();
52 /// ```
53 #[must_use]
54 pub const fn new() -> Self {
55 Self { _private: () }
56 }
57}
58
59impl fmt::Display for InvalidEncodingError {
60 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61 f.write_str("Could not parse encoding")
62 }
63}
64
65#[cfg(feature = "std")]
66impl std::error::Error for InvalidEncodingError {}
67
68/// An Encoding instance represents a character encoding usable in Ruby.
69///
70/// `spinoso-string` supports three `String` encodings:
71///
72/// - [UTF-8](Self::Utf8)
73/// - [ASCII](Self::Ascii)
74/// - [Binary](Self::Binary)
75///
76/// A `String`'s encoding makes no assertions about the byte content of the
77/// `String`'s internal buffer. The `Encoding` associated with a [`String`]
78/// modifies how character-oriented APIs behave, for example
79/// [`String::char_len`]. A `String` with an UTF-8 encoding is only
80/// [conventionally UTF-8] and may contain invalid UTF-8 byte sequences.
81///
82/// Ruby provides the [`String#encode`] API which can transcode the bytes of a
83/// `String` to another encoding. Calling `String#encode` on any of the
84/// encodings defined in this enum is a no-op.
85///
86/// [`String`]: crate::String
87/// [`String::char_len`]: crate::String::char_len
88/// [UTF-8]: Self::Utf8
89/// [conventionally UTF-8]: https://docs.rs/bstr/0.2.*/bstr/#differences-with-standard-strings
90/// [`String#encode`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-encode
91#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
92pub enum Encoding {
93 /// Conventionally UTF-8.
94 Utf8,
95 /// Conventionally ASCII.
96 Ascii,
97 /// ASCII-8BIT, binary, arbitrary bytes.
98 Binary,
99}
100
101impl Default for Encoding {
102 #[inline]
103 fn default() -> Self {
104 Self::Utf8
105 }
106}
107
108impl fmt::Debug for Encoding {
109 /// Outputs the value of `Encoding#inspect`.
110 ///
111 /// Returns a string which represents the encoding for programmers. See
112 /// [`Encoding::inspect`].
113 #[inline]
114 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
115 f.write_str(self.inspect())
116 }
117}
118
119impl fmt::Display for Encoding {
120 /// Outputs the value of `Encoding#to_s`.
121 ///
122 /// Returns the name of the encoding. See [`Encoding::name`].
123 #[inline]
124 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
125 f.write_str(self.name())
126 }
127}
128
129impl TryFrom<u8> for Encoding {
130 type Error = InvalidEncodingError;
131
132 /// Try to deserialize an `Encoding` from a bitflag.
133 ///
134 /// See [`Encoding::try_from_flag`].
135 #[inline]
136 fn try_from(flag: u8) -> Result<Self, InvalidEncodingError> {
137 Self::try_from_flag(flag)
138 }
139}
140
141impl From<Encoding> for u8 {
142 /// Serialize an `Encoding` to a bitflag.
143 ///
144 /// See [`Encoding::to_flag`].
145 #[inline]
146 fn from(encoding: Encoding) -> Self {
147 encoding.to_flag()
148 }
149}
150
151impl Encoding {
152 /// The total number of supported encodings.
153 ///
154 /// `spinoso-string` supports three encodings:
155 ///
156 /// - [UTF-8](Self::Utf8)
157 /// - [ASCII](Self::Ascii)
158 /// - [Binary](Self::Binary)
159 pub const NUM_SUPPORTED_ENCODINGS: usize = 3;
160
161 /// Serialize the encoding to a bitflag.
162 ///
163 /// See [`try_from_flag`] for how to deserialize an encoding.
164 ///
165 /// This function is used to implement [`From<Encoding>`] for [`u8`].
166 ///
167 /// # Examples
168 ///
169 /// ```
170 /// # use spinoso_string::Encoding;
171 /// assert_eq!(Encoding::Utf8.to_flag(), 2);
172 /// assert_eq!(Encoding::Ascii.to_flag(), 4);
173 /// assert_eq!(Encoding::Binary.to_flag(), 8);
174 /// ```
175 ///
176 /// [`try_from_flag`]: Self::try_from_flag
177 /// [`From<Encoding>`]: From
178 #[inline]
179 #[must_use]
180 pub const fn to_flag(self) -> u8 {
181 match self {
182 Self::Utf8 => 1 << 1,
183 Self::Ascii => 1 << 2,
184 Self::Binary => 1 << 3,
185 }
186 }
187
188 /// Deserialize an encoding from a bitflag.
189 ///
190 /// See [`to_flag`] for how to serialize an encoding.
191 ///
192 /// This function is used to implement [`TryFrom<u8>`] for `Encoding`.
193 ///
194 /// # Errors
195 ///
196 /// If the given flag does not map to any [`Encoding`], an error is
197 /// returned.
198 ///
199 /// # Examples
200 ///
201 /// ```
202 /// # use spinoso_string::{Encoding, InvalidEncodingError};
203 /// assert_eq!(Encoding::try_from_flag(2), Ok(Encoding::Utf8));
204 /// assert_eq!(Encoding::try_from_flag(4), Ok(Encoding::Ascii));
205 /// assert_eq!(Encoding::try_from_flag(8), Ok(Encoding::Binary));
206 /// assert_eq!(
207 /// Encoding::try_from_flag(2 | 4),
208 /// Err(InvalidEncodingError::new())
209 /// );
210 /// assert_eq!(
211 /// Encoding::try_from_flag(255),
212 /// Err(InvalidEncodingError::new())
213 /// );
214 /// ```
215 ///
216 /// [`to_flag`]: Self::to_flag
217 /// [`TryFrom<u8>`]: TryFrom
218 #[inline]
219 pub const fn try_from_flag(flag: u8) -> Result<Self, InvalidEncodingError> {
220 match flag {
221 x if x == Self::Utf8.to_flag() => Ok(Self::Utf8),
222 x if x == Self::Ascii.to_flag() => Ok(Self::Ascii),
223 x if x == Self::Binary.to_flag() => Ok(Self::Binary),
224 _ => Err(InvalidEncodingError::new()),
225 }
226 }
227
228 /// Returns a string which represents the encoding for programmers.
229 ///
230 /// # Examples
231 ///
232 /// ```
233 /// # use spinoso_string::Encoding;
234 /// assert_eq!(Encoding::Utf8.inspect(), "#<Encoding:UTF-8>");
235 /// assert_eq!(Encoding::Ascii.inspect(), "#<Encoding:US-ASCII>");
236 /// assert_eq!(Encoding::Binary.inspect(), "#<Encoding:ASCII-8BIT>");
237 /// ```
238 ///
239 /// # Ruby Examples
240 ///
241 /// ```ruby
242 /// Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
243 /// Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
244 /// ```
245 #[must_use]
246 pub const fn inspect(self) -> &'static str {
247 match self {
248 // ```
249 // [2.6.3] > Encoding::UTF_8.inspect
250 // => "#<Encoding:UTF-8>"
251 // ```
252 Self::Utf8 => "#<Encoding:UTF-8>",
253 // ```
254 // [2.6.3] > Encoding::ASCII.inspect
255 // => "#<Encoding:US-ASCII>"
256 // ```
257 Self::Ascii => "#<Encoding:US-ASCII>",
258 // ```
259 // [2.6.3] > Encoding::BINARY.inspect
260 // => "#<Encoding:ASCII-8BIT>"
261 // ```
262 Self::Binary => "#<Encoding:ASCII-8BIT>",
263 }
264 }
265
266 /// Returns the name of the encoding.
267 ///
268 /// This function is used to implement [`fmt::Display`] for `Encoding`.
269 ///
270 /// This function can be used to implement the Ruby functions
271 /// `Encoding#name` and `Encoding#to_s`.
272 ///
273 /// # Examples
274 ///
275 /// ```
276 /// # use spinoso_string::Encoding;
277 /// assert_eq!(Encoding::Utf8.name(), "UTF-8");
278 /// assert_eq!(Encoding::Ascii.name(), "US-ASCII");
279 /// assert_eq!(Encoding::Binary.name(), "ASCII-8BIT");
280 /// ```
281 ///
282 /// # Ruby Examples
283 ///
284 /// ```ruby
285 /// Encoding::UTF_8.name #=> "UTF-8"
286 /// ```
287 #[inline]
288 #[must_use]
289 pub const fn name(self) -> &'static str {
290 match self {
291 // ```
292 // [2.6.3] > Encoding::UTF_8.name
293 // => "UTF-8"
294 // ```
295 Self::Utf8 => "UTF-8",
296 // ```
297 // [2.6.3] > Encoding::ASCII.name
298 // => "US-ASCII"
299 // ```
300 Self::Ascii => "US-ASCII",
301 // ```
302 // [2.6.3] > Encoding::BINARY.name
303 // => "ASCII-8BIT"
304 // ```
305 Self::Binary => "ASCII-8BIT",
306 }
307 }
308
309 /// Returns the list of name and aliases of the encoding.
310 ///
311 /// This function can be used to implement the Ruby function
312 /// `Encoding#names`.
313 ///
314 /// # Examples
315 ///
316 /// ```
317 /// # use spinoso_string::Encoding;
318 /// assert_eq!(Encoding::Utf8.names(), ["UTF-8", "CP65001"]);
319 /// assert_eq!(
320 /// Encoding::Ascii.names(),
321 /// ["US-ASCII", "ASCII", "ANSI_X3.4-1968", "646"]
322 /// );
323 /// assert_eq!(Encoding::Binary.names(), ["ASCII-8BIT", "BINARY"]);
324 /// ```
325 ///
326 /// # Ruby Examples
327 ///
328 /// ```ruby
329 /// Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
330 /// ```
331 #[inline]
332 #[must_use]
333 pub const fn names(self) -> &'static [&'static str] {
334 match self {
335 Self::Utf8 => UTF8_NAMES,
336 Self::Ascii => ASCII_NAMES,
337 Self::Binary => BINARY_NAMES,
338 }
339 }
340
341 /// Returns whether ASCII-compatible or not.
342 ///
343 /// This function can be used to implement the Ruby function
344 /// `Encoding#ascii_compatible?`.
345 ///
346 /// # Examples
347 ///
348 /// ```
349 /// # use spinoso_string::Encoding;
350 /// assert!(Encoding::Utf8.is_ascii_compatible());
351 /// assert!(Encoding::Ascii.is_ascii_compatible());
352 /// assert!(Encoding::Binary.is_ascii_compatible());
353 /// ```
354 ///
355 /// # Ruby Examples
356 ///
357 /// ```ruby
358 /// Encoding::UTF_8.ascii_compatible? #=> true
359 /// Encoding::UTF_16BE.ascii_compatible? #=> false
360 /// ```
361 #[inline]
362 #[must_use]
363 pub const fn is_ascii_compatible(self) -> bool {
364 matches!(self, Self::Utf8 | Self::Ascii | Self::Binary)
365 }
366
367 /// Returns true for dummy encodings.
368 ///
369 /// A dummy encoding is an encoding for which character handling is not
370 /// properly implemented. It is used for stateful encodings.
371 ///
372 /// This function can be used to implement the Ruby function
373 /// `Encoding#dummy?`.
374 ///
375 /// # Examples
376 ///
377 /// ```
378 /// # use spinoso_string::Encoding;
379 /// assert!(!Encoding::Utf8.is_dummy());
380 /// assert!(!Encoding::Ascii.is_dummy());
381 /// assert!(!Encoding::Binary.is_dummy());
382 /// ```
383 ///
384 /// # Ruby Examples
385 ///
386 /// ```ruby
387 /// Encoding::ISO_2022_JP.dummy? #=> true
388 /// Encoding::UTF_8.dummy? #=> false
389 /// ```
390 #[inline]
391 #[must_use]
392 pub const fn is_dummy(self) -> bool {
393 !matches!(self, Self::Utf8 | Self::Ascii | Self::Binary)
394 }
395}