spinoso_regexp/
options.rs

1//! Parse options parameter to `Regexp#initialize` and `Regexp::compile`.
2
3use core::fmt;
4
5use bstr::ByteSlice;
6
7use crate::Flags;
8
9/// The state of a Regexp engine flag in [`Options`].
10#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
11pub enum RegexpOption {
12    /// Engine feature is disabled.
13    ///
14    /// Features are disabled by default.
15    Disabled,
16    /// Engine feature is disabled.
17    Enabled,
18}
19
20impl RegexpOption {
21    /// Construct a new, disabled `RegexpOption`.
22    #[must_use]
23    pub const fn new() -> Self {
24        Self::Disabled
25    }
26
27    /// Return whether this option is enabled.
28    ///
29    /// An option is enabled if it is equal to [`RegexpOption::Enabled`].
30    #[must_use]
31    pub const fn is_enabled(self) -> bool {
32        matches!(self, Self::Enabled)
33    }
34}
35
36impl Default for RegexpOption {
37    /// Create a disabled `RegexpOption`.
38    fn default() -> Self {
39        Self::Disabled
40    }
41}
42
43impl From<bool> for RegexpOption {
44    /// Convert from `bool` to its `RegexpOption` representation.
45    ///
46    /// `true` creates a [`RegexpOption::Enabled`]. `false` creates a
47    /// [`RegexpOption::Disabled`].
48    fn from(value: bool) -> Self {
49        if value { Self::Enabled } else { Self::Disabled }
50    }
51}
52
53impl From<RegexpOption> for bool {
54    /// Convert from `RegexpOption` to its Boolean representation.
55    ///
56    /// See also [`is_enabled`].
57    ///
58    /// [`is_enabled`]: RegexpOption::is_enabled
59    fn from(value: RegexpOption) -> Self {
60        matches!(value, RegexpOption::Enabled)
61    }
62}
63
64/// Configuration options for Ruby Regexps.
65///
66/// Options can be supplied either as an `Integer` object to `Regexp::new` or
67/// inline in Regexp literals like `/artichoke/i`.
68#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
69pub struct Options {
70    flags: Flags,
71}
72
73impl From<Options> for Flags {
74    /// Convert an `Options` to its bit flag representation.
75    fn from(opts: Options) -> Self {
76        opts.flags
77    }
78}
79
80impl From<Options> for u8 {
81    /// Convert an `Options` to its bit representation.
82    fn from(opts: Options) -> Self {
83        opts.flags.bits()
84    }
85}
86
87impl From<Options> for i64 {
88    /// Convert an `Options` to its widened bit representation.
89    fn from(opts: Options) -> Self {
90        opts.flags.bits().into()
91    }
92}
93
94impl From<Flags> for Options {
95    fn from(mut flags: Flags) -> Self {
96        flags.remove(Flags::FIXEDENCODING | Flags::NOENCODING);
97        Self { flags }
98    }
99}
100
101impl From<u8> for Options {
102    fn from(flags: u8) -> Self {
103        let flags = Flags::from_bits_truncate(flags);
104        Self::from(flags)
105    }
106}
107
108impl From<i64> for Options {
109    /// Truncate the given `i64` to one byte and generate flags.
110    ///
111    /// See `From<u8>`. For a conversion that fails if the given `i64` is
112    /// larger than [`u8::MAX`], see [`try_from_int`].
113    ///
114    /// [`try_from_int`]: Self::try_from_int
115    fn from(flags: i64) -> Self {
116        let [byte, ..] = flags.to_le_bytes();
117        Self::from(byte)
118    }
119}
120
121impl From<Option<bool>> for Options {
122    fn from(options: Option<bool>) -> Self {
123        match options {
124            Some(false) | None => Self::new(),
125            Some(true) => Self::with_ignore_case(),
126        }
127    }
128}
129
130impl From<&str> for Options {
131    fn from(options: &str) -> Self {
132        Self::from(options.as_bytes())
133    }
134}
135
136impl From<&[u8]> for Options {
137    fn from(options: &[u8]) -> Self {
138        let mut flags = Flags::empty();
139        flags.set(Flags::MULTILINE, options.find_byte(b'm').is_some());
140        flags.set(Flags::IGNORECASE, options.find_byte(b'i').is_some());
141        flags.set(Flags::EXTENDED, options.find_byte(b'x').is_some());
142        flags.set(Flags::LITERAL, options.find_byte(b'l').is_some());
143        Self { flags }
144    }
145}
146
147impl From<String> for Options {
148    fn from(options: String) -> Self {
149        Self::from(options.as_str())
150    }
151}
152
153impl From<Vec<u8>> for Options {
154    fn from(options: Vec<u8>) -> Self {
155        Self::from(options.as_slice())
156    }
157}
158
159impl fmt::Display for Options {
160    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
161        f.write_str(self.as_display_modifier())
162    }
163}
164
165impl Options {
166    /// Constructs a new, default `Options`.
167    #[must_use]
168    pub const fn new() -> Self {
169        Self { flags: Flags::empty() }
170    }
171
172    /// An options instance that has only case insensitive mode enabled.
173    #[must_use]
174    pub const fn with_ignore_case() -> Self {
175        Self {
176            flags: Flags::IGNORECASE,
177        }
178    }
179
180    /// Try to parse an `Options` from a full-width `i64`.
181    ///
182    /// If `options` cannot be converted losslessly to a `u8`, this function
183    /// returns [`None`]. See `From<u8>`.
184    ///
185    /// For a conversion from `i64` that truncates the given `options` to `u8`,
186    /// see `From<i64>`.
187    #[must_use]
188    pub fn try_from_int(options: i64) -> Option<Self> {
189        let options = u8::try_from(options).ok()?;
190        Some(Self::from(options))
191    }
192
193    /// Convert an `Options` to its bit flag representation.
194    ///
195    /// Alias for the corresponding `Into<Flags>` implementation.
196    #[must_use]
197    pub fn flags(self) -> Flags {
198        let mut flags = self.flags;
199        flags.remove(Flags::LITERAL);
200        flags
201    }
202
203    /// Convert an `Options` to its bit representation.
204    ///
205    /// Alias for the corresponding `Into<u8>` implementation.
206    #[must_use]
207    pub const fn into_bits(self) -> u8 {
208        self.flags.bits()
209    }
210
211    /// Whether these `Options` are configured for multiline mode.
212    #[must_use]
213    pub const fn multiline(self) -> RegexpOption {
214        if self.flags.intersects(Flags::MULTILINE) {
215            RegexpOption::Enabled
216        } else {
217            RegexpOption::Disabled
218        }
219    }
220
221    /// Whether these `Options` are configured for case-insensitive mode.
222    #[must_use]
223    pub const fn ignore_case(self) -> RegexpOption {
224        if self.flags.intersects(Flags::IGNORECASE) {
225            RegexpOption::Enabled
226        } else {
227            RegexpOption::Disabled
228        }
229    }
230
231    /// Whether these `Options` are configured for extended mode with
232    /// insignificant whitespace.
233    #[must_use]
234    pub const fn extended(self) -> RegexpOption {
235        if self.flags.intersects(Flags::EXTENDED) {
236            RegexpOption::Enabled
237        } else {
238            RegexpOption::Disabled
239        }
240    }
241
242    /// Whether the Regexp was parsed as a literal, e.g. `'/artichoke/i`.
243    ///
244    /// This enables Ruby parsers to inject whether a Regexp is a literal to the
245    /// core library. Literal Regexps have some special behavior regarding
246    /// capturing groups and report parse failures differently.
247    #[must_use]
248    pub const fn is_literal(self) -> bool {
249        self.flags.intersects(Flags::LITERAL)
250    }
251
252    /// Serialize the option flags to a string suitable for a `Regexp` display
253    /// or debug implementation.
254    ///
255    /// See also [`Regexp#inspect`][regexp-inspect].
256    ///
257    /// [regexp-inspect]: https://ruby-doc.org/core-3.1.2/Regexp.html#method-i-inspect
258    #[must_use]
259    pub const fn as_display_modifier(self) -> &'static str {
260        use RegexpOption::{Disabled, Enabled};
261
262        match (self.multiline(), self.ignore_case(), self.extended()) {
263            (Enabled, Enabled, Enabled) => "mix",
264            (Enabled, Enabled, Disabled) => "mi",
265            (Enabled, Disabled, Enabled) => "mx",
266            (Enabled, Disabled, Disabled) => "m",
267            (Disabled, Enabled, Enabled) => "ix",
268            (Disabled, Enabled, Disabled) => "i",
269            (Disabled, Disabled, Enabled) => "x",
270            (Disabled, Disabled, Disabled) => "",
271        }
272    }
273
274    /// Serialize the option flags to a string suitable for including in a raw
275    /// pattern for configuring an underlying `Regexp`.
276    #[must_use]
277    pub const fn as_inline_modifier(self) -> &'static str {
278        use RegexpOption::{Disabled, Enabled};
279
280        match (self.multiline(), self.ignore_case(), self.extended()) {
281            (Enabled, Enabled, Enabled) => "mix",
282            (Enabled, Enabled, Disabled) => "mi-x",
283            (Enabled, Disabled, Enabled) => "mx-i",
284            (Enabled, Disabled, Disabled) => "m-ix",
285            (Disabled, Enabled, Enabled) => "ix-m",
286            (Disabled, Enabled, Disabled) => "i-mx",
287            (Disabled, Disabled, Enabled) => "x-mi",
288            (Disabled, Disabled, Disabled) => "-mix",
289        }
290    }
291
292    /// Inserts or removes the specified flags depending on the passed value.
293    pub fn set(&mut self, other: Flags, value: bool) {
294        self.flags.set(other, value);
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::{Options, RegexpOption};
301    use crate::Flags;
302
303    #[test]
304    fn new_is_empty_flags() {
305        assert_eq!(Options::new(), Options::from(Flags::empty()));
306    }
307
308    #[test]
309    fn from_all_flags_ignores_encoding_and_literal() {
310        assert_eq!(
311            Options::from(Flags::all()),
312            Options::from(Flags::ALL_REGEXP_OPTS | Flags::LITERAL)
313        );
314    }
315
316    // If options is an `Integer`, it should be one or more of the constants
317    // `Regexp::EXTENDED`, `Regexp::IGNORECASE`, and `Regexp::MULTILINE`, or-ed
318    // together. Otherwise, if options is not `nil` or `false`, the regexp will
319    // be case insensitive.
320
321    #[test]
322    fn parse_options_from_option_bool() {
323        assert_eq!(Options::from(None), Options::new());
324        assert_eq!(Options::from(Some(false)), Options::new());
325        assert_eq!(Options::from(Some(true)), Options::with_ignore_case());
326    }
327
328    #[test]
329    fn new_is_ignore_case() {
330        let mut opts = Options::new();
331        opts.flags |= Flags::IGNORECASE;
332        assert_eq!(Options::with_ignore_case(), opts);
333    }
334
335    #[test]
336    fn make_options_extended() {
337        let mut opts = Options::new();
338        opts.flags |= Flags::EXTENDED;
339        assert_eq!(Options::from(Flags::EXTENDED), opts);
340        assert_eq!(
341            Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | i64::MAX),
342            None
343        );
344        assert_eq!(Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | 4096), None);
345        assert_eq!(Options::from(Flags::EXTENDED.bits() | 64), opts);
346        assert_ne!(Options::from(Flags::EXTENDED | Flags::IGNORECASE), opts);
347        assert_ne!(Options::from(Flags::EXTENDED | Flags::MULTILINE), opts);
348        assert_ne!(
349            Options::from(Flags::EXTENDED | Flags::IGNORECASE | Flags::MULTILINE),
350            opts
351        );
352        assert_eq!(opts.ignore_case(), RegexpOption::Disabled);
353        assert_eq!(opts.extended(), RegexpOption::Enabled);
354        assert_eq!(opts.multiline(), RegexpOption::Disabled);
355    }
356
357    #[test]
358    fn make_options_ignore_case() {
359        let mut opts = Options::new();
360        opts.flags |= Flags::IGNORECASE;
361        assert_eq!(Options::from(Flags::IGNORECASE), opts);
362        assert_eq!(
363            Options::try_from_int(i64::from(Flags::IGNORECASE.bits()) | i64::MAX),
364            None
365        );
366        assert_eq!(Options::try_from_int(i64::from(Flags::IGNORECASE.bits()) | 4096), None);
367        assert_eq!(Options::from(Flags::IGNORECASE.bits() | 64), opts);
368        assert_ne!(Options::from(Flags::IGNORECASE | Flags::EXTENDED), opts);
369        assert_ne!(Options::from(Flags::IGNORECASE | Flags::MULTILINE), opts);
370        assert_ne!(
371            Options::from(Flags::EXTENDED | Flags::IGNORECASE | Flags::MULTILINE),
372            opts
373        );
374        assert_eq!(opts.ignore_case(), RegexpOption::Enabled);
375        assert_eq!(opts.extended(), RegexpOption::Disabled);
376        assert_eq!(opts.multiline(), RegexpOption::Disabled);
377    }
378
379    #[test]
380    fn make_options_multiline() {
381        let mut opts = Options::new();
382        opts.flags |= Flags::MULTILINE;
383        assert_eq!(Options::from(Flags::MULTILINE), opts);
384        assert_eq!(
385            Options::try_from_int(i64::from(Flags::MULTILINE.bits()) | i64::MAX),
386            None
387        );
388        assert_eq!(Options::try_from_int(i64::from(Flags::MULTILINE.bits()) | 4096), None);
389        assert_eq!(Options::from(Flags::MULTILINE.bits() | 64), opts);
390        assert_ne!(Options::from(Flags::MULTILINE | Flags::IGNORECASE), opts);
391        assert_ne!(Options::from(Flags::MULTILINE | Flags::EXTENDED), opts);
392        assert_ne!(
393            Options::from(Flags::EXTENDED | Flags::IGNORECASE | Flags::MULTILINE),
394            opts
395        );
396        assert_eq!(opts.ignore_case(), RegexpOption::Disabled);
397        assert_eq!(opts.extended(), RegexpOption::Disabled);
398        assert_eq!(opts.multiline(), RegexpOption::Enabled);
399    }
400
401    #[test]
402    fn make_options_extended_ignore_case() {
403        let mut opts = Options::new();
404        opts.flags |= Flags::EXTENDED | Flags::IGNORECASE;
405        assert_ne!(Options::from(Flags::EXTENDED), opts);
406        assert_ne!(Options::from(Flags::IGNORECASE), opts);
407        assert_eq!(
408            Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | i64::MAX),
409            None
410        );
411        assert_eq!(
412            Options::try_from_int(i64::from(Flags::IGNORECASE.bits()) | i64::MAX),
413            None
414        );
415        assert_eq!(
416            Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | i64::from(Flags::IGNORECASE.bits()) | i64::MAX),
417            None
418        );
419        assert_eq!(Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | 4096), None);
420        assert_eq!(Options::try_from_int(i64::from(Flags::MULTILINE.bits()) | 4096), None);
421        assert_eq!(
422            Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | i64::from(Flags::MULTILINE.bits()) | 4096),
423            None
424        );
425        assert_eq!(
426            Options::from(Flags::EXTENDED.bits() | Flags::IGNORECASE.bits() | 64),
427            opts
428        );
429        assert_eq!(Options::from(Flags::EXTENDED | Flags::IGNORECASE), opts);
430        assert_ne!(
431            Options::from(Flags::EXTENDED | Flags::IGNORECASE | Flags::MULTILINE),
432            opts
433        );
434        assert_eq!(opts.ignore_case(), RegexpOption::Enabled);
435        assert_eq!(opts.extended(), RegexpOption::Enabled);
436        assert_eq!(opts.multiline(), RegexpOption::Disabled);
437    }
438
439    #[test]
440    fn make_options_extended_ignore_case_multiline() {
441        let mut opts = Options::new();
442        opts.flags |= Flags::EXTENDED | Flags::IGNORECASE | Flags::MULTILINE;
443        assert_ne!(Options::from(Flags::EXTENDED), opts);
444        assert_ne!(Options::from(Flags::IGNORECASE), opts);
445        assert_ne!(Options::from(Flags::MULTILINE), opts);
446        assert_eq!(
447            Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | i64::MAX),
448            None
449        );
450        assert_eq!(
451            Options::try_from_int(i64::from(Flags::IGNORECASE.bits()) | i64::MAX),
452            None
453        );
454        assert_eq!(
455            Options::try_from_int(i64::from(Flags::MULTILINE.bits()) | i64::MAX),
456            None
457        );
458        assert_eq!(Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | 4096), None);
459        assert_eq!(Options::try_from_int(i64::from(Flags::IGNORECASE.bits()) | 4096), None);
460        assert_eq!(Options::try_from_int(i64::from(Flags::MULTILINE.bits()) | 4096), None);
461        assert_eq!(
462            Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | i64::from(Flags::MULTILINE.bits()) | 4096),
463            None
464        );
465        assert_ne!(Options::from(Flags::EXTENDED.bits() | 64), opts);
466        assert_ne!(Options::from(Flags::IGNORECASE.bits() | 64), opts);
467        assert_ne!(Options::from(Flags::MULTILINE.bits() | 64), opts);
468        assert_ne!(
469            Options::from(Flags::EXTENDED.bits() | Flags::MULTILINE.bits() | 64),
470            opts
471        );
472        assert_ne!(Options::from(Flags::EXTENDED | Flags::IGNORECASE), opts);
473        assert_ne!(Options::from(Flags::MULTILINE | Flags::IGNORECASE), opts);
474        assert_eq!(
475            Options::from(Flags::EXTENDED | Flags::IGNORECASE | Flags::MULTILINE),
476            opts
477        );
478        assert_eq!(Options::from(Flags::ALL_REGEXP_OPTS), opts);
479        assert_eq!(opts.ignore_case(), RegexpOption::Enabled);
480        assert_eq!(opts.extended(), RegexpOption::Enabled);
481        assert_eq!(opts.multiline(), RegexpOption::Enabled);
482    }
483
484    #[test]
485    fn make_options_all_opts() {
486        // `ALL_REGEXP_OPTS` is equivalent to `EXTENDED | IGNORECASE | MULTILINE` flags.
487        let mut opts = Options::new();
488        opts.flags |= Flags::ALL_REGEXP_OPTS;
489        assert_ne!(Options::from(Flags::EXTENDED), opts);
490        assert_ne!(Options::from(Flags::IGNORECASE), opts);
491        assert_ne!(Options::from(Flags::MULTILINE), opts);
492        assert_eq!(
493            Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | i64::MAX),
494            None
495        );
496        assert_eq!(
497            Options::try_from_int(i64::from(Flags::IGNORECASE.bits()) | i64::MAX),
498            None
499        );
500        assert_eq!(
501            Options::try_from_int(i64::from(Flags::MULTILINE.bits()) | i64::MAX),
502            None
503        );
504        assert_eq!(Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | 4096), None);
505        assert_eq!(Options::try_from_int(i64::from(Flags::IGNORECASE.bits()) | 4096), None);
506        assert_eq!(Options::try_from_int(i64::from(Flags::MULTILINE.bits()) | 4096), None);
507        assert_eq!(
508            Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | i64::from(Flags::MULTILINE.bits()) | 4096),
509            None
510        );
511        assert_ne!(Options::from(Flags::EXTENDED.bits() | 64), opts);
512        assert_ne!(Options::from(Flags::IGNORECASE.bits() | 64), opts);
513        assert_ne!(Options::from(Flags::MULTILINE.bits() | 64), opts);
514        assert_ne!(
515            Options::from(Flags::EXTENDED.bits() | Flags::MULTILINE.bits() | 64),
516            opts
517        );
518        assert_ne!(Options::from(Flags::EXTENDED | Flags::IGNORECASE), opts);
519        assert_ne!(Options::from(Flags::MULTILINE | Flags::IGNORECASE), opts);
520        assert_eq!(
521            Options::from(Flags::EXTENDED | Flags::IGNORECASE | Flags::MULTILINE),
522            opts
523        );
524        assert_eq!(Options::from(Flags::ALL_REGEXP_OPTS), opts);
525        assert_eq!(opts.ignore_case(), RegexpOption::Enabled);
526        assert_eq!(opts.extended(), RegexpOption::Enabled);
527        assert_eq!(opts.multiline(), RegexpOption::Enabled);
528    }
529
530    #[test]
531    fn make_options_flags_all() {
532        // Ignore encoding and literal flags.
533        let opts = Options::from(Flags::all());
534        assert_ne!(Options::from(Flags::EXTENDED), opts);
535        assert_ne!(Options::from(Flags::IGNORECASE), opts);
536        assert_ne!(Options::from(Flags::MULTILINE), opts);
537        assert_eq!(
538            Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | i64::MAX),
539            None
540        );
541        assert_eq!(
542            Options::try_from_int(i64::from(Flags::IGNORECASE.bits()) | i64::MAX),
543            None
544        );
545        assert_eq!(
546            Options::try_from_int(i64::from(Flags::MULTILINE.bits()) | i64::MAX),
547            None
548        );
549        assert_eq!(Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | 4096), None);
550        assert_eq!(Options::try_from_int(i64::from(Flags::IGNORECASE.bits()) | 4096), None);
551        assert_eq!(Options::try_from_int(i64::from(Flags::MULTILINE.bits()) | 4096), None);
552        assert_eq!(
553            Options::try_from_int(i64::from(Flags::EXTENDED.bits()) | i64::from(Flags::MULTILINE.bits()) | 4096),
554            None
555        );
556        assert_ne!(Options::from(Flags::EXTENDED.bits() | 64), opts);
557        assert_ne!(Options::from(Flags::IGNORECASE.bits() | 64), opts);
558        assert_ne!(Options::from(Flags::MULTILINE.bits() | 64), opts);
559        assert_ne!(
560            Options::from(Flags::EXTENDED.bits() | Flags::MULTILINE.bits() | 64),
561            opts
562        );
563        assert_ne!(Options::from(Flags::EXTENDED | Flags::IGNORECASE), opts);
564        assert_ne!(Options::from(Flags::MULTILINE | Flags::IGNORECASE), opts);
565        assert_eq!(
566            Options::from(Flags::EXTENDED | Flags::IGNORECASE | Flags::MULTILINE | Flags::LITERAL),
567            opts
568        );
569
570        let mut flags = opts.flags;
571        flags.remove(Flags::LITERAL);
572        assert_eq!(Options::from(Flags::ALL_REGEXP_OPTS).flags, flags);
573        assert_eq!(opts.ignore_case(), RegexpOption::Enabled);
574        assert_eq!(opts.extended(), RegexpOption::Enabled);
575        assert_eq!(opts.multiline(), RegexpOption::Enabled);
576    }
577}