1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#[macro_use]
extern crate bitflags;

use bstr::ByteSlice;
use core::fmt;
use core::num::NonZeroUsize;
use std::borrow::Cow;

mod debug;
mod encoding;
mod error;
mod options;
mod regexp;

pub use debug::Debug;
pub use encoding::{Encoding, InvalidEncodingError};
pub use error::{ArgumentError, Error, RegexpError, SyntaxError};
pub use options::{Options, RegexpOption};

bitflags! {
    #[derive(Default)]
    pub struct Flags: u8 {
        const IGNORECASE      = 0b00000001;
        const EXTENDED        = 0b00000010;
        const MULTILINE       = 0b00000100;
        const ALL_REGEXP_OPTS = Self::IGNORECASE.bits | Self::EXTENDED.bits | Self::MULTILINE.bits;

        const FIXEDENCODING   = 0b00010000;
        const NOENCODING      = 0b00100000;

        const LITERAL         = 0b10000000;
    }
}

/// The string matched by the last successful match.
pub const LAST_MATCHED_STRING: &[u8] = b"$&";

/// The string to the left of the last successful match.
pub const STRING_LEFT_OF_MATCH: &[u8] = b"$`";

/// The string to the right of the last successful match.
pub const STRING_RIGHT_OF_MATCH: &[u8] = b"$'";

/// The highest group matched by the last successful match.
// TODO: implement this.
pub const HIGHEST_MATCH_GROUP: &[u8] = b"$+";

/// The information about the last match in the current scope.
pub const LAST_MATCH: &[u8] = b"$~";

#[derive(Default, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Source {
    pattern: Vec<u8>,
    options: Options,
}

impl fmt::Debug for Source {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("Source")
            .field("pattern", &self.pattern.as_bstr())
            .field("options", &self.options)
            .finish()
    }
}

impl From<Config> for Source {
    fn from(config: Config) -> Self {
        Self::with_pattern_and_options(config.pattern.clone(), config.options)
    }
}

impl From<&Config> for Source {
    fn from(config: &Config) -> Self {
        Self::with_pattern_and_options(config.pattern.clone(), config.options)
    }
}

impl Source {
    /// Construct a new, empty `Source`.
    pub const fn new() -> Self {
        Self {
            pattern: Vec::new(),
            options: Options::new(),
        }
    }

    /// Construct a new `Source` with the given pattern and [`Options`].
    pub const fn with_pattern_and_options(pattern: Vec<u8>, options: Options) -> Self {
        Self { pattern, options }
    }

    /// Whether this source was parsed with ignore case enabled.
    pub const fn is_casefold(&self) -> bool {
        self.options.ignore_case().is_enabled()
    }

    /// Whether the Regexp was parsed as a literal, e.g. `'/artichoke/i`.
    ///
    /// This enables Ruby parsers to inject whether a Regexp is a literal to the
    /// core library. Literal Regexps have some special behavior regrding
    /// capturing groups and report parse failures differently.
    pub const fn is_literal(&self) -> bool {
        self.options.is_literal()
    }

    /// Extracts a slice containing the entire pattern.
    pub fn pattern(&self) -> &[u8] {
        self.pattern.as_slice()
    }

    /// Return a copy of the underlying [`Options`].
    pub const fn options(&self) -> Options {
        self.options
    }
}

#[derive(Default, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Config {
    pattern: Vec<u8>,
    options: Options,
}

impl fmt::Debug for Config {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("Source")
            .field("pattern", &self.pattern.as_bstr())
            .field("options", &self.options)
            .finish()
    }
}

impl From<Source> for Config {
    fn from(source: Source) -> Self {
        Self::with_pattern_and_options(source.pattern.clone(), source.options)
    }
}

impl From<&Source> for Config {
    fn from(source: &Source) -> Self {
        Self::with_pattern_and_options(source.pattern.clone(), source.options)
    }
}

impl Config {
    /// Construct a new, empty `Config`.
    pub const fn new() -> Self {
        Self {
            pattern: Vec::new(),
            options: Options::new(),
        }
    }

    /// Construct a new `Config` with the given pattern and [`Options`].
    pub const fn with_pattern_and_options(pattern: Vec<u8>, options: Options) -> Self {
        Self { pattern, options }
    }

    /// Extracts a slice containing the entire pattern.
    pub fn pattern(&self) -> &[u8] {
        self.pattern.as_slice()
    }

    /// Return a copy of the underlying [`Options`].
    pub const fn options(&self) -> Options {
        self.options
    }
}

/// Global variable name for the nth capture group from a `Regexp` match.
#[inline]
#[must_use]
pub fn nth_match_group(group: NonZeroUsize) -> Cow<'static, [u8]> {
    match group.get() {
        1 => Cow::Borrowed(b"$1"),
        2 => Cow::Borrowed(b"$2"),
        3 => Cow::Borrowed(b"$3"),
        4 => Cow::Borrowed(b"$4"),
        5 => Cow::Borrowed(b"$5"),
        6 => Cow::Borrowed(b"$6"),
        7 => Cow::Borrowed(b"$7"),
        8 => Cow::Borrowed(b"$8"),
        9 => Cow::Borrowed(b"$9"),
        10 => Cow::Borrowed(b"$10"),
        11 => Cow::Borrowed(b"$11"),
        12 => Cow::Borrowed(b"$12"),
        13 => Cow::Borrowed(b"$13"),
        14 => Cow::Borrowed(b"$14"),
        15 => Cow::Borrowed(b"$15"),
        16 => Cow::Borrowed(b"$16"),
        17 => Cow::Borrowed(b"$17"),
        18 => Cow::Borrowed(b"$18"),
        19 => Cow::Borrowed(b"$19"),
        20 => Cow::Borrowed(b"$20"),
        num => {
            let mut buf = String::from("$");
            // Suppress fmt errors because this function is infallible.
            //
            // In practice `itoa::fmt` will never error because the `fmt::Write`
            // impl for `String` never panics.
            let _ = itoa::fmt(&mut buf, num);
            Cow::Owned(buf.into_bytes())
        }
    }
}

#[cfg(test)]
mod tests {
    use core::num::NonZeroUsize;
    use std::borrow::Cow;

    use super::nth_match_group;

    #[test]
    fn match_group_symbol() {
        for num in 1..=1024 {
            let num = NonZeroUsize::new(num).unwrap();
            let sym = nth_match_group(num);
            let num = format!("{}", num);
            assert!(sym.len() > 1);
            assert_eq!(sym[0..1], *b"$");
            assert_eq!(sym[1..], *num.as_bytes());
        }
    }

    #[test]
    fn some_globals_are_static_slices() {
        for num in 1..=20 {
            let num = NonZeroUsize::new(num).unwrap();
            let sym = nth_match_group(num);
            assert!(matches!(sym, Cow::Borrowed(_)));
        }
        for num in 21..=1024 {
            let num = NonZeroUsize::new(num).unwrap();
            let sym = nth_match_group(num);
            assert!(matches!(sym, Cow::Owned(_)));
        }
    }
}