spinoso_regexp/
encoding.rs

1//! Parse encoding parameter to `Regexp#initialize` and `Regexp::compile`.
2
3use core::fmt;
4use core::hash::{Hash, Hasher};
5use core::mem;
6use std::error;
7
8use bstr::ByteSlice;
9
10use crate::Flags;
11
12#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
13pub struct InvalidEncodingError {
14    _private: (),
15}
16
17impl InvalidEncodingError {
18    /// Constructs a new, default `InvalidEncodingError`.
19    #[must_use]
20    pub const fn new() -> Self {
21        Self { _private: () }
22    }
23}
24
25impl fmt::Display for InvalidEncodingError {
26    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27        f.write_str("Invalid Regexp encoding")
28    }
29}
30
31impl error::Error for InvalidEncodingError {}
32
33/// The encoding of a Regexp literal.
34///
35/// Regexps are assumed to use the source encoding but literals may override
36/// the encoding with a Regexp modifier.
37///
38/// See [`Regexp` encoding][regexp-encoding].
39///
40/// [regexp-encoding]: https://ruby-doc.org/core-3.1.2/Regexp.html#class-Regexp-label-Encoding
41#[derive(Debug, Clone, Copy, PartialOrd, Ord)]
42pub enum Encoding {
43    Fixed,
44    No,
45    None,
46}
47
48impl Default for Encoding {
49    fn default() -> Self {
50        Self::None
51    }
52}
53
54impl Hash for Encoding {
55    fn hash<H: Hasher>(&self, state: &mut H) {
56        let discriminant = mem::discriminant(self);
57        discriminant.hash(state);
58    }
59}
60
61impl PartialEq for Encoding {
62    fn eq(&self, other: &Self) -> bool {
63        use Encoding::{Fixed, No, None};
64
65        matches!((self, other), (No | None, No | None) | (Fixed, Fixed))
66    }
67}
68
69impl Eq for Encoding {}
70
71impl TryFrom<Flags> for Encoding {
72    type Error = InvalidEncodingError;
73
74    fn try_from(mut flags: Flags) -> Result<Self, Self::Error> {
75        flags.set(Flags::ALL_REGEXP_OPTS, false);
76        if flags.intersects(Flags::FIXEDENCODING) {
77            Ok(Self::Fixed)
78        } else if flags.intersects(Flags::NOENCODING) {
79            Ok(Encoding::No)
80        } else if flags.is_empty() {
81            Ok(Encoding::new())
82        } else {
83            Err(InvalidEncodingError::new())
84        }
85    }
86}
87
88impl TryFrom<u8> for Encoding {
89    type Error = InvalidEncodingError;
90
91    fn try_from(flags: u8) -> Result<Self, Self::Error> {
92        let flags = Flags::from_bits(flags).ok_or_else(InvalidEncodingError::new)?;
93        Self::try_from(flags)
94    }
95}
96
97impl TryFrom<i64> for Encoding {
98    type Error = InvalidEncodingError;
99
100    fn try_from(flags: i64) -> Result<Self, Self::Error> {
101        let [byte, ..] = flags.to_le_bytes();
102        Self::try_from(byte)
103    }
104}
105
106impl TryFrom<&str> for Encoding {
107    type Error = InvalidEncodingError;
108
109    fn try_from(encoding: &str) -> Result<Self, Self::Error> {
110        if encoding.contains('u') && encoding.contains('n') {
111            return Err(InvalidEncodingError::new());
112        }
113        let mut enc = None;
114        for flag in encoding.bytes() {
115            match flag {
116                b'u' | b's' | b'e' if enc.is_none() => enc = Some(Encoding::Fixed),
117                b'n' if enc.is_none() => enc = Some(Encoding::No),
118                b'i' | b'm' | b'x' | b'o' => continue,
119                _ => return Err(InvalidEncodingError::new()),
120            }
121        }
122        Ok(enc.unwrap_or_default())
123    }
124}
125
126impl TryFrom<&[u8]> for Encoding {
127    type Error = InvalidEncodingError;
128
129    fn try_from(encoding: &[u8]) -> Result<Self, Self::Error> {
130        if encoding.find_byte(b'u').is_some() && encoding.find_byte(b'n').is_some() {
131            return Err(InvalidEncodingError::new());
132        }
133        let mut enc = None;
134        for &flag in encoding {
135            match flag {
136                b'u' | b's' | b'e' if enc.is_none() => enc = Some(Encoding::Fixed),
137                b'n' if enc.is_none() => enc = Some(Encoding::No),
138                b'i' | b'm' | b'x' | b'o' | b'l' => continue,
139                _ => return Err(InvalidEncodingError::new()),
140            }
141        }
142        Ok(enc.unwrap_or_default())
143    }
144}
145
146impl TryFrom<String> for Encoding {
147    type Error = InvalidEncodingError;
148
149    fn try_from(encoding: String) -> Result<Self, Self::Error> {
150        Self::try_from(encoding.as_str())
151    }
152}
153
154impl TryFrom<Vec<u8>> for Encoding {
155    type Error = InvalidEncodingError;
156
157    fn try_from(encoding: Vec<u8>) -> Result<Self, Self::Error> {
158        Self::try_from(encoding.as_slice())
159    }
160}
161
162impl From<Encoding> for Flags {
163    /// Convert an `Encoding` to its bit flag representation.
164    fn from(encoding: Encoding) -> Self {
165        encoding.flags()
166    }
167}
168
169impl From<&Encoding> for Flags {
170    /// Convert an `Encoding` to its bit flag representation.
171    fn from(encoding: &Encoding) -> Self {
172        encoding.flags()
173    }
174}
175
176impl From<Encoding> for u8 {
177    /// Convert an `Encoding` to its bit representation.
178    fn from(encoding: Encoding) -> Self {
179        encoding.into_bits()
180    }
181}
182
183impl From<&Encoding> for u8 {
184    /// Convert an `Encoding` to its bit representation.
185    fn from(encoding: &Encoding) -> Self {
186        encoding.into_bits()
187    }
188}
189
190impl From<Encoding> for i64 {
191    /// Convert an `Encoding` to its widened bit representation.
192    fn from(encoding: Encoding) -> Self {
193        encoding.into_bits().into()
194    }
195}
196
197impl From<&Encoding> for i64 {
198    /// Convert an `Encoding` to its widened bit representation.
199    fn from(encoding: &Encoding) -> Self {
200        encoding.into_bits().into()
201    }
202}
203
204impl fmt::Display for Encoding {
205    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
206        f.write_str(self.as_modifier_str())
207    }
208}
209
210impl Encoding {
211    /// Construct a new [`None`] encoding.
212    ///
213    /// [`None`]: Self::None
214    #[must_use]
215    pub const fn new() -> Self {
216        Self::None
217    }
218
219    /// Convert an `Encoding` to its bit flag representation.
220    ///
221    /// Alias for the corresponding `Into<Flags>` implementation.
222    #[must_use]
223    pub const fn flags(self) -> Flags {
224        match self {
225            Self::Fixed => Flags::FIXEDENCODING,
226            Self::No => Flags::NOENCODING,
227            Self::None => Flags::empty(),
228        }
229    }
230
231    /// Convert an `Encoding` to its bit representation.
232    ///
233    /// Alias for the corresponding `Into<u8>` implementation.
234    #[must_use]
235    pub const fn into_bits(self) -> u8 {
236        self.flags().bits()
237    }
238
239    /// Serialize the encoding flags to a string suitable for a `Regexp` display
240    /// or debug implementation.
241    ///
242    /// See also [`Regexp#inspect`][regexp-inspect].
243    ///
244    /// [regexp-inspect]: https://ruby-doc.org/core-3.1.2/Regexp.html#method-i-inspect
245    #[must_use]
246    pub const fn as_modifier_str(self) -> &'static str {
247        match self {
248            Self::Fixed | Self::None => "",
249            Self::No => "n",
250        }
251    }
252}