artichoke_backend/extn/core/regexp/
pattern.rs1use core::iter;
4
5use bstr::ByteSlice;
6
7use super::{Flags, Options, RegexpOption};
8
9#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
11pub struct Pattern {
12 pattern: Vec<u8>,
13 options: Options,
14}
15
16impl Pattern {
17 #[must_use]
19 pub fn into_pattern(self) -> Vec<u8> {
20 self.pattern
21 }
22
23 #[must_use]
25 pub const fn options(&self) -> Options {
26 self.options
27 }
28}
29
30#[inline]
31#[must_use]
32fn build_pattern<T>(pattern: T, options: Options) -> Pattern
33where
34 T: IntoIterator<Item = u8>,
35{
36 let iter = pattern.into_iter();
37 let hint = iter.size_hint();
38 let modifiers = options.as_inline_modifier();
39 let mut parsed = Vec::with_capacity(2 + modifiers.len() + 2 + hint.1.unwrap_or(hint.0));
40 parsed.extend_from_slice(b"(?");
41 parsed.extend_from_slice(modifiers.as_bytes());
42 parsed.push(b':');
43 parsed.extend(iter);
44 parsed.push(b')');
45 Pattern {
46 pattern: parsed,
47 options,
48 }
49}
50
51#[must_use]
52pub fn parse<T: AsRef<[u8]>>(pattern: T, options: Options) -> Pattern {
53 let pattern = pattern.as_ref();
54 let mut chars = pattern.bytes().enumerate();
55
56 match chars.next() {
57 Some((_, b'(')) => {}
58 Some(_) => return build_pattern(pattern.bytes(), options),
59 None => return build_pattern(iter::empty(), options),
60 }
61 match chars.next() {
62 Some((_, b'?')) => {}
63 Some(_) => return build_pattern(pattern.bytes(), options),
64 None => return build_pattern(iter::once(b'('), options),
65 }
66
67 let orignal_options = options;
68 let mut options = options;
69 let mut enable_literal_option = RegexpOption::Enabled;
70
71 for (_, token) in &mut chars {
72 match token {
73 b'-' => enable_literal_option = RegexpOption::Disabled,
74 b'i' => {
75 options.set(Flags::IGNORECASE, enable_literal_option.into());
76 }
77 b'm' => {
78 options.set(Flags::MULTILINE, enable_literal_option.into());
79 }
80 b'x' => {
81 options.set(Flags::EXTENDED, enable_literal_option.into());
82 }
83 b':' => break,
84 _ => return build_pattern(pattern.bytes(), options),
85 }
86 }
87
88 let mut chars = chars.peekable();
89 let cursor = if let Some((idx, _)) = chars.peek() {
90 *idx
91 } else {
92 pattern.len()
93 };
94
95 let mut nest = 1;
96 while let Some((_, token)) = chars.next() {
97 if token == b'(' {
98 nest += 1;
99 } else if token == b')' {
100 nest -= 1;
101 if nest == 0 && chars.next().is_some() {
102 return build_pattern(pattern.bytes(), orignal_options);
103 }
104 break;
105 }
106 }
107
108 let slice = pattern.get(cursor..).unwrap_or_default();
109 let modifiers = options.as_inline_modifier();
110 let mut parsed = Vec::with_capacity(2 + modifiers.len() + 1 + slice.len());
111 parsed.extend(b"(?");
112 parsed.extend(modifiers.as_bytes());
113 parsed.push(b':');
114 parsed.extend_from_slice(slice);
115 Pattern {
116 pattern: parsed,
117 options,
118 }
119}
120
121#[cfg(test)]
122mod tests {
123 use bstr::BString;
124
125 use crate::extn::core::regexp::{Flags, Options};
126
127 #[test]
128 fn parse_literal_string_pattern() {
129 let opts = Options::new();
130 let parsed = super::parse("foo", opts);
131 assert_eq!(BString::from("(?-mix:foo)"), BString::from(parsed.into_pattern()));
132 }
133
134 #[test]
137 fn parse_options_if_included_and_expand() {
138 let opts = Options::from(Flags::ALL_REGEXP_OPTS);
139 let parsed = super::parse("abc", opts);
140 assert_eq!(BString::from("(?mix:abc)"), BString::from(parsed.into_pattern()),);
141 }
142
143 #[test]
144 fn parse_non_included_options_and_embed_expanded_modifiers_prefixed_by_a_minus_sign() {
145 let opts = Options::from(Flags::IGNORECASE);
146 let parsed = super::parse("abc", opts);
147 assert_eq!(BString::from("(?i-mx:abc)"), BString::from(parsed.into_pattern()),);
148 }
149
150 #[test]
151 fn parse_patterns_with_no_enabled_options_and_expand_with_all_modifiers_excluded() {
152 let opts = Options::new();
153 let parsed = super::parse("abc", opts);
154 assert_eq!(BString::from("(?-mix:abc)"), BString::from(parsed.into_pattern()),);
155 }
156
157 #[test]
158 fn embeds_the_pattern_after_the_options_after_parsing() {
159 let opts = Options::from(Flags::ALL_REGEXP_OPTS);
160 let parsed = super::parse("ab+c", opts);
161 assert_eq!(BString::from("(?mix:ab+c)"), BString::from(parsed.into_pattern()),);
162 let opts = Options::new();
163 let parsed = super::parse("xyz", opts);
164 assert_eq!(BString::from("(?-mix:xyz)"), BString::from(parsed.into_pattern()),);
165 }
166
167 #[test]
168 fn parse_groups_with_options() {
169 let opts = Options::new();
170 let parsed = super::parse("(?ix:foo)(?m:bar)", opts);
171 assert_eq!(
172 BString::from("(?-mix:(?ix:foo)(?m:bar))"),
173 BString::from(parsed.into_pattern()),
174 );
175 let opts = Options::from(Flags::MULTILINE);
176 let parsed = super::parse("(?ix:foo)bar", opts);
177 assert_eq!(
178 BString::from("(?m-ix:(?ix:foo)bar)"),
179 BString::from(parsed.into_pattern()),
180 );
181 }
182
183 #[test]
184 fn parse_a_single_group_with_options_as_the_main_regexp() {
185 let opts = Options::new();
186 let parsed = super::parse("(?i:nothing outside this group)", opts);
187 assert_eq!(
188 BString::from("(?i-mx:nothing outside this group)"),
189 BString::from(parsed.into_pattern())
190 );
191 }
192
193 #[test]
194 fn parse_uncaptured_groups() {
195 let opts = Options::from(Flags::IGNORECASE | Flags::EXTENDED);
196 let parsed = super::parse("whatever(?:0d)", opts);
197 assert_eq!(
198 BString::from("(?ix-m:whatever(?:0d))"),
199 BString::from(parsed.into_pattern()),
200 );
201 }
202
203 #[test]
204 fn parse_lookahead_groups() {
205 let opts = Options::new();
206 let parsed = super::parse("(?=5)", opts);
207 assert_eq!(BString::from("(?-mix:(?=5))"), BString::from(parsed.into_pattern()));
208 let opts = Options::new();
209 let parsed = super::parse("(?!5)", opts);
210 assert_eq!(BString::from("(?-mix:(?!5))"), BString::from(parsed.into_pattern()));
211 }
212
213 #[test]
214 fn parse_to_fully_expanded_options_inline() {
215 let opts = Options::from(Flags::IGNORECASE | Flags::EXTENDED);
216 let parsed = super::parse("ab+c", opts);
217 assert_eq!(BString::from("(?ix-m:ab+c)"), BString::from(parsed.into_pattern()),);
218 let opts = Options::new();
219 let parsed = super::parse("(?i:.)", opts);
220 assert_eq!(BString::from("(?i-mx:.)"), BString::from(parsed.into_pattern()),);
221 let opts = Options::new();
222 let parsed = super::parse("(?:.)", opts);
223 assert_eq!(BString::from("(?-mix:.)"), BString::from(parsed.into_pattern()),);
224 }
225
226 #[test]
227 fn parse_abusive_options_literals() {
228 let opts = Options::new();
229 let parsed = super::parse("(?mmmmix-miiiix:)", opts);
230 assert_eq!(BString::from("(?-mix:)"), BString::from(parsed.into_pattern()),);
231 }
232}