spinoso_string/
encoding.rs

1use core::fmt;
2
3// ```
4// [2.6.3] > Encoding::UTF_8.names
5// => ["UTF-8", "CP65001", "locale", "external", "filesystem"]
6// ```
7const UTF8_NAMES: &[&str] = &["UTF-8", "CP65001"];
8// ```
9// [2.6.3] > Encoding::ASCII.names
10// => ["US-ASCII", "ASCII", "ANSI_X3.4-1968", "646"]
11// ```
12const ASCII_NAMES: &[&str] = &["US-ASCII", "ASCII", "ANSI_X3.4-1968", "646"];
13// ```
14// [2.6.3] > Encoding::BINARY.names
15// => ["ASCII-8BIT", "BINARY"]
16// ```
17const BINARY_NAMES: &[&str] = &["ASCII-8BIT", "BINARY"];
18
19/// Error returned when failing to deserialize an [`Encoding`].
20///
21/// This error is returned from [`Encoding::try_from_flag`]. See its
22/// documentation for more detail.
23///
24/// When the **std** feature of `spinoso-string` is enabled, this struct
25/// implements [`std::error::Error`].
26///
27/// # Examples
28///
29/// ```
30/// # use spinoso_string::{Encoding, InvalidEncodingError};
31/// assert_eq!(
32///     Encoding::try_from_flag(255),
33///     Err(InvalidEncodingError::new())
34/// );
35/// assert_eq!(Encoding::try_from(255), Err(InvalidEncodingError::new()));
36/// ```
37///
38/// [`std::error::Error`]: https://doc.rust-lang.org/std/error/trait.Error.html
39#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
40pub struct InvalidEncodingError {
41    _private: (),
42}
43
44impl InvalidEncodingError {
45    /// Construct a new `InvalidEncodingError`.
46    ///
47    /// # Examples
48    ///
49    /// ```
50    /// # use spinoso_string::InvalidEncodingError;
51    /// const ERR: InvalidEncodingError = InvalidEncodingError::new();
52    /// ```
53    #[must_use]
54    pub const fn new() -> Self {
55        Self { _private: () }
56    }
57}
58
59impl fmt::Display for InvalidEncodingError {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        f.write_str("Could not parse encoding")
62    }
63}
64
65#[cfg(feature = "std")]
66impl std::error::Error for InvalidEncodingError {}
67
68/// An Encoding instance represents a character encoding usable in Ruby.
69///
70/// `spinoso-string` supports three `String` encodings:
71///
72/// - [UTF-8](Self::Utf8)
73/// - [ASCII](Self::Ascii)
74/// - [Binary](Self::Binary)
75///
76/// A `String`'s encoding makes no assertions about the byte content of the
77/// `String`'s internal buffer. The `Encoding` associated with a [`String`]
78/// modifies how character-oriented APIs behave, for example
79/// [`String::char_len`]. A `String` with an UTF-8 encoding is only
80/// [conventionally UTF-8] and may contain invalid UTF-8 byte sequences.
81///
82/// Ruby provides the [`String#encode`] API which can transcode the bytes of a
83/// `String` to another encoding. Calling `String#encode` on any of the
84/// encodings defined in this enum is a no-op.
85///
86/// [`String`]: crate::String
87/// [`String::char_len`]: crate::String::char_len
88/// [UTF-8]: Self::Utf8
89/// [conventionally UTF-8]: https://docs.rs/bstr/0.2.*/bstr/#differences-with-standard-strings
90/// [`String#encode`]: https://ruby-doc.org/core-3.1.2/String.html#method-i-encode
91#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
92pub enum Encoding {
93    /// Conventionally UTF-8.
94    Utf8,
95    /// Conventionally ASCII.
96    Ascii,
97    /// ASCII-8BIT, binary, arbitrary bytes.
98    Binary,
99}
100
101impl Default for Encoding {
102    #[inline]
103    fn default() -> Self {
104        Self::Utf8
105    }
106}
107
108impl fmt::Debug for Encoding {
109    /// Outputs the value of `Encoding#inspect`.
110    ///
111    /// Returns a string which represents the encoding for programmers. See
112    /// [`Encoding::inspect`].
113    #[inline]
114    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
115        f.write_str(self.inspect())
116    }
117}
118
119impl fmt::Display for Encoding {
120    /// Outputs the value of `Encoding#to_s`.
121    ///
122    /// Returns the name of the encoding. See [`Encoding::name`].
123    #[inline]
124    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
125        f.write_str(self.name())
126    }
127}
128
129impl TryFrom<u8> for Encoding {
130    type Error = InvalidEncodingError;
131
132    /// Try to deserialize an `Encoding` from a bitflag.
133    ///
134    /// See [`Encoding::try_from_flag`].
135    #[inline]
136    fn try_from(flag: u8) -> Result<Self, InvalidEncodingError> {
137        Self::try_from_flag(flag)
138    }
139}
140
141impl From<Encoding> for u8 {
142    /// Serialize an `Encoding` to a bitflag.
143    ///
144    /// See [`Encoding::to_flag`].
145    #[inline]
146    fn from(encoding: Encoding) -> Self {
147        encoding.to_flag()
148    }
149}
150
151impl Encoding {
152    /// The total number of supported encodings.
153    ///
154    /// `spinoso-string` supports three encodings:
155    ///
156    /// - [UTF-8](Self::Utf8)
157    /// - [ASCII](Self::Ascii)
158    /// - [Binary](Self::Binary)
159    pub const NUM_SUPPORTED_ENCODINGS: usize = 3;
160
161    /// Serialize the encoding to a bitflag.
162    ///
163    /// See [`try_from_flag`] for how to deserialize an encoding.
164    ///
165    /// This function is used to implement [`From<Encoding>`] for [`u8`].
166    ///
167    /// # Examples
168    ///
169    /// ```
170    /// # use spinoso_string::Encoding;
171    /// assert_eq!(Encoding::Utf8.to_flag(), 2);
172    /// assert_eq!(Encoding::Ascii.to_flag(), 4);
173    /// assert_eq!(Encoding::Binary.to_flag(), 8);
174    /// ```
175    ///
176    /// [`try_from_flag`]: Self::try_from_flag
177    /// [`From<Encoding>`]: From
178    #[inline]
179    #[must_use]
180    pub const fn to_flag(self) -> u8 {
181        match self {
182            Self::Utf8 => 1 << 1,
183            Self::Ascii => 1 << 2,
184            Self::Binary => 1 << 3,
185        }
186    }
187
188    /// Deserialize an encoding from a bitflag.
189    ///
190    /// See [`to_flag`] for how to serialize an encoding.
191    ///
192    /// This function is used to implement [`TryFrom<u8>`] for `Encoding`.
193    ///
194    /// # Errors
195    ///
196    /// If the given flag does not map to any [`Encoding`], an error is
197    /// returned.
198    ///
199    /// # Examples
200    ///
201    /// ```
202    /// # use spinoso_string::{Encoding, InvalidEncodingError};
203    /// assert_eq!(Encoding::try_from_flag(2), Ok(Encoding::Utf8));
204    /// assert_eq!(Encoding::try_from_flag(4), Ok(Encoding::Ascii));
205    /// assert_eq!(Encoding::try_from_flag(8), Ok(Encoding::Binary));
206    /// assert_eq!(
207    ///     Encoding::try_from_flag(2 | 4),
208    ///     Err(InvalidEncodingError::new())
209    /// );
210    /// assert_eq!(
211    ///     Encoding::try_from_flag(255),
212    ///     Err(InvalidEncodingError::new())
213    /// );
214    /// ```
215    ///
216    /// [`to_flag`]: Self::to_flag
217    /// [`TryFrom<u8>`]: TryFrom
218    #[inline]
219    pub const fn try_from_flag(flag: u8) -> Result<Self, InvalidEncodingError> {
220        match flag {
221            x if x == Self::Utf8.to_flag() => Ok(Self::Utf8),
222            x if x == Self::Ascii.to_flag() => Ok(Self::Ascii),
223            x if x == Self::Binary.to_flag() => Ok(Self::Binary),
224            _ => Err(InvalidEncodingError::new()),
225        }
226    }
227
228    /// Returns a string which represents the encoding for programmers.
229    ///
230    /// # Examples
231    ///
232    /// ```
233    /// # use spinoso_string::Encoding;
234    /// assert_eq!(Encoding::Utf8.inspect(), "#<Encoding:UTF-8>");
235    /// assert_eq!(Encoding::Ascii.inspect(), "#<Encoding:US-ASCII>");
236    /// assert_eq!(Encoding::Binary.inspect(), "#<Encoding:ASCII-8BIT>");
237    /// ```
238    ///
239    /// # Ruby Examples
240    ///
241    /// ```ruby
242    /// Encoding::UTF_8.inspect       #=> "#<Encoding:UTF-8>"
243    /// Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
244    /// ```
245    #[must_use]
246    pub const fn inspect(self) -> &'static str {
247        match self {
248            // ```
249            // [2.6.3] > Encoding::UTF_8.inspect
250            // => "#<Encoding:UTF-8>"
251            // ```
252            Self::Utf8 => "#<Encoding:UTF-8>",
253            // ```
254            // [2.6.3] > Encoding::ASCII.inspect
255            // => "#<Encoding:US-ASCII>"
256            // ```
257            Self::Ascii => "#<Encoding:US-ASCII>",
258            // ```
259            // [2.6.3] > Encoding::BINARY.inspect
260            // => "#<Encoding:ASCII-8BIT>"
261            // ```
262            Self::Binary => "#<Encoding:ASCII-8BIT>",
263        }
264    }
265
266    /// Returns the name of the encoding.
267    ///
268    /// This function is used to implement [`fmt::Display`] for `Encoding`.
269    ///
270    /// This function can be used to implement the Ruby functions
271    /// `Encoding#name` and `Encoding#to_s`.
272    ///
273    /// # Examples
274    ///
275    /// ```
276    /// # use spinoso_string::Encoding;
277    /// assert_eq!(Encoding::Utf8.name(), "UTF-8");
278    /// assert_eq!(Encoding::Ascii.name(), "US-ASCII");
279    /// assert_eq!(Encoding::Binary.name(), "ASCII-8BIT");
280    /// ```
281    ///
282    /// # Ruby Examples
283    ///
284    /// ```ruby
285    /// Encoding::UTF_8.name      #=> "UTF-8"
286    /// ```
287    #[inline]
288    #[must_use]
289    pub const fn name(self) -> &'static str {
290        match self {
291            // ```
292            // [2.6.3] > Encoding::UTF_8.name
293            // => "UTF-8"
294            // ```
295            Self::Utf8 => "UTF-8",
296            // ```
297            // [2.6.3] > Encoding::ASCII.name
298            // => "US-ASCII"
299            // ```
300            Self::Ascii => "US-ASCII",
301            // ```
302            // [2.6.3] > Encoding::BINARY.name
303            // => "ASCII-8BIT"
304            // ```
305            Self::Binary => "ASCII-8BIT",
306        }
307    }
308
309    /// Returns the list of name and aliases of the encoding.
310    ///
311    /// This function can be used to implement the Ruby function
312    /// `Encoding#names`.
313    ///
314    /// # Examples
315    ///
316    /// ```
317    /// # use spinoso_string::Encoding;
318    /// assert_eq!(Encoding::Utf8.names(), ["UTF-8", "CP65001"]);
319    /// assert_eq!(
320    ///     Encoding::Ascii.names(),
321    ///     ["US-ASCII", "ASCII", "ANSI_X3.4-1968", "646"]
322    /// );
323    /// assert_eq!(Encoding::Binary.names(), ["ASCII-8BIT", "BINARY"]);
324    /// ```
325    ///
326    /// # Ruby Examples
327    ///
328    /// ```ruby
329    /// Encoding::WINDOWS_31J.names  #=> ["Windows-31J", "CP932", "csWindows31J"]
330    /// ```
331    #[inline]
332    #[must_use]
333    pub const fn names(self) -> &'static [&'static str] {
334        match self {
335            Self::Utf8 => UTF8_NAMES,
336            Self::Ascii => ASCII_NAMES,
337            Self::Binary => BINARY_NAMES,
338        }
339    }
340
341    /// Returns whether ASCII-compatible or not.
342    ///
343    /// This function can be used to implement the Ruby function
344    /// `Encoding#ascii_compatible?`.
345    ///
346    /// # Examples
347    ///
348    /// ```
349    /// # use spinoso_string::Encoding;
350    /// assert!(Encoding::Utf8.is_ascii_compatible());
351    /// assert!(Encoding::Ascii.is_ascii_compatible());
352    /// assert!(Encoding::Binary.is_ascii_compatible());
353    /// ```
354    ///
355    /// # Ruby Examples
356    ///
357    /// ```ruby
358    /// Encoding::UTF_8.ascii_compatible?     #=> true
359    /// Encoding::UTF_16BE.ascii_compatible?  #=> false
360    /// ```
361    #[inline]
362    #[must_use]
363    pub const fn is_ascii_compatible(self) -> bool {
364        matches!(self, Self::Utf8 | Self::Ascii | Self::Binary)
365    }
366
367    /// Returns true for dummy encodings.
368    ///
369    /// A dummy encoding is an encoding for which character handling is not
370    /// properly implemented. It is used for stateful encodings.
371    ///
372    /// This function can be used to implement the Ruby function
373    /// `Encoding#dummy?`.
374    ///
375    /// # Examples
376    ///
377    /// ```
378    /// # use spinoso_string::Encoding;
379    /// assert!(!Encoding::Utf8.is_dummy());
380    /// assert!(!Encoding::Ascii.is_dummy());
381    /// assert!(!Encoding::Binary.is_dummy());
382    /// ```
383    ///
384    /// # Ruby Examples
385    ///
386    /// ```ruby
387    /// Encoding::ISO_2022_JP.dummy?       #=> true
388    /// Encoding::UTF_8.dummy?             #=> false
389    /// ```
390    #[inline]
391    #[must_use]
392    pub const fn is_dummy(self) -> bool {
393        !matches!(self, Self::Utf8 | Self::Ascii | Self::Binary)
394    }
395}