spinoso_regexp/
encoding.rs

1//! Parse encoding parameter to `Regexp#initialize` and `Regexp::compile`.
2
3use core::fmt;
4use core::hash::{Hash, Hasher};
5use core::mem;
6use std::error;
7
8use bstr::ByteSlice;
9
10use crate::Flags;
11
12#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
13pub struct InvalidEncodingError {
14    _private: (),
15}
16
17impl InvalidEncodingError {
18    /// Constructs a new, default `InvalidEncodingError`.
19    #[must_use]
20    pub const fn new() -> Self {
21        Self { _private: () }
22    }
23}
24
25impl fmt::Display for InvalidEncodingError {
26    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27        f.write_str("Invalid Regexp encoding")
28    }
29}
30
31impl error::Error for InvalidEncodingError {}
32
33/// The encoding of a Regexp literal.
34///
35/// Regexps are assumed to use the source encoding but literals may override
36/// the encoding with a Regexp modifier.
37///
38/// See [`Regexp` encoding][regexp-encoding].
39///
40/// [regexp-encoding]: https://ruby-doc.org/core-3.1.2/Regexp.html#class-Regexp-label-Encoding
41#[derive(Debug, Clone, Copy, PartialOrd, Ord)]
42pub enum Encoding {
43    Fixed,
44    No,
45    None,
46}
47
48impl Default for Encoding {
49    fn default() -> Self {
50        Self::None
51    }
52}
53
54impl Hash for Encoding {
55    fn hash<H: Hasher>(&self, state: &mut H) {
56        let discriminant = mem::discriminant(self);
57        discriminant.hash(state);
58    }
59}
60
61impl PartialEq for Encoding {
62    fn eq(&self, other: &Self) -> bool {
63        use Encoding::{Fixed, No, None};
64
65        matches!((self, other), (No | None, No | None) | (Fixed, Fixed))
66    }
67}
68
69impl Eq for Encoding {}
70
71impl TryFrom<Flags> for Encoding {
72    type Error = InvalidEncodingError;
73
74    fn try_from(mut flags: Flags) -> Result<Self, Self::Error> {
75        flags.set(Flags::ALL_REGEXP_OPTS, false);
76        if flags.intersects(Flags::FIXEDENCODING) {
77            Ok(Self::Fixed)
78        } else if flags.intersects(Flags::NOENCODING) {
79            Ok(Encoding::No)
80        } else if flags.is_empty() {
81            Ok(Encoding::new())
82        } else {
83            Err(InvalidEncodingError::new())
84        }
85    }
86}
87
88impl TryFrom<u8> for Encoding {
89    type Error = InvalidEncodingError;
90
91    fn try_from(flags: u8) -> Result<Self, Self::Error> {
92        let flags = Flags::from_bits(flags).ok_or_else(InvalidEncodingError::new)?;
93        Self::try_from(flags)
94    }
95}
96
97impl TryFrom<i64> for Encoding {
98    type Error = InvalidEncodingError;
99
100    fn try_from(flags: i64) -> Result<Self, Self::Error> {
101        let [byte, ..] = flags.to_le_bytes();
102        Self::try_from(byte)
103    }
104}
105
106impl TryFrom<&str> for Encoding {
107    type Error = InvalidEncodingError;
108
109    fn try_from(encoding: &str) -> Result<Self, Self::Error> {
110        encoding.as_bytes().try_into()
111    }
112}
113
114impl TryFrom<&[u8]> for Encoding {
115    type Error = InvalidEncodingError;
116
117    fn try_from(encoding: &[u8]) -> Result<Self, Self::Error> {
118        if encoding.find_byte(b'u').is_some() && encoding.find_byte(b'n').is_some() {
119            return Err(InvalidEncodingError::new());
120        }
121        let mut enc = None;
122        for &flag in encoding {
123            match flag {
124                b'u' | b's' | b'e' if enc.is_none() => enc = Some(Encoding::Fixed),
125                b'n' if enc.is_none() => enc = Some(Encoding::No),
126                b'i' | b'm' | b'x' | b'o' | b'l' => {}
127                _ => return Err(InvalidEncodingError::new()),
128            }
129        }
130        Ok(enc.unwrap_or_default())
131    }
132}
133
134impl TryFrom<String> for Encoding {
135    type Error = InvalidEncodingError;
136
137    fn try_from(encoding: String) -> Result<Self, Self::Error> {
138        Self::try_from(encoding.as_str())
139    }
140}
141
142impl TryFrom<Vec<u8>> for Encoding {
143    type Error = InvalidEncodingError;
144
145    fn try_from(encoding: Vec<u8>) -> Result<Self, Self::Error> {
146        Self::try_from(encoding.as_slice())
147    }
148}
149
150impl From<Encoding> for Flags {
151    /// Convert an `Encoding` to its bit flag representation.
152    fn from(encoding: Encoding) -> Self {
153        encoding.flags()
154    }
155}
156
157impl From<&Encoding> for Flags {
158    /// Convert an `Encoding` to its bit flag representation.
159    fn from(encoding: &Encoding) -> Self {
160        encoding.flags()
161    }
162}
163
164impl From<Encoding> for u8 {
165    /// Convert an `Encoding` to its bit representation.
166    fn from(encoding: Encoding) -> Self {
167        encoding.into_bits()
168    }
169}
170
171impl From<&Encoding> for u8 {
172    /// Convert an `Encoding` to its bit representation.
173    fn from(encoding: &Encoding) -> Self {
174        encoding.into_bits()
175    }
176}
177
178impl From<Encoding> for i64 {
179    /// Convert an `Encoding` to its widened bit representation.
180    fn from(encoding: Encoding) -> Self {
181        encoding.into_bits().into()
182    }
183}
184
185impl From<&Encoding> for i64 {
186    /// Convert an `Encoding` to its widened bit representation.
187    fn from(encoding: &Encoding) -> Self {
188        encoding.into_bits().into()
189    }
190}
191
192impl fmt::Display for Encoding {
193    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
194        f.write_str(self.as_modifier_str())
195    }
196}
197
198impl Encoding {
199    /// Construct a new [`None`] encoding.
200    ///
201    /// [`None`]: Self::None
202    #[must_use]
203    pub const fn new() -> Self {
204        Self::None
205    }
206
207    /// Convert an `Encoding` to its bit flag representation.
208    ///
209    /// Alias for the corresponding `Into<Flags>` implementation.
210    #[must_use]
211    pub const fn flags(self) -> Flags {
212        match self {
213            Self::Fixed => Flags::FIXEDENCODING,
214            Self::No => Flags::NOENCODING,
215            Self::None => Flags::empty(),
216        }
217    }
218
219    /// Convert an `Encoding` to its bit representation.
220    ///
221    /// Alias for the corresponding `Into<u8>` implementation.
222    #[must_use]
223    pub const fn into_bits(self) -> u8 {
224        self.flags().bits()
225    }
226
227    /// Serialize the encoding flags to a string suitable for a `Regexp` display
228    /// or debug implementation.
229    ///
230    /// See also [`Regexp#inspect`][regexp-inspect].
231    ///
232    /// [regexp-inspect]: https://ruby-doc.org/core-3.1.2/Regexp.html#method-i-inspect
233    #[must_use]
234    pub const fn as_modifier_str(self) -> &'static str {
235        match self {
236            Self::Fixed | Self::None => "",
237            Self::No => "n",
238        }
239    }
240}