artichoke_backend/extn/core/regexp/
syntax.rs

1// This module is forked from `regex-syntax` crate @ `26f7318e`.
2//
3// https://github.com/rust-lang/regex/blob/26f7318e2895eae56e95a260e81e2d48b90e7c25/regex-syntax/src/lib.rs
4//
5// MIT License
6// Copyright (c) 2014 The Rust Project Developers
7
8//! Helpers for parsing Regexp patterns.
9
10/// Escapes all regular expression meta characters in `text`.
11///
12/// The string returned may be safely used as a literal in a regular expression.
13#[must_use]
14pub fn escape(text: &str) -> String {
15    let mut quoted = String::new();
16    escape_into(text, &mut quoted);
17    quoted
18}
19
20/// Escapes all meta characters in `text` and writes the result into `buf`.
21///
22/// This will append escape characters into the given buffer. The characters
23/// that are appended are safe to use as a literal in a regular expression.
24pub fn escape_into(text: &str, buf: &mut String) {
25    buf.reserve(text.len());
26    for c in text.chars() {
27        match c {
28            c if is_meta_character(c) => {
29                buf.push('\\');
30                buf.push(c);
31            }
32            c if is_non_printable_character(c) => {
33                if let Some(escape) = is_non_supported_non_printable_character(c) {
34                    buf.push_str(escape);
35                } else {
36                    for part in c.escape_default() {
37                        buf.push(part);
38                    }
39                }
40            }
41            c => buf.push(c),
42        }
43    }
44}
45
46/// Returns true if the given character has significance in a regex.
47#[must_use]
48#[allow(
49    clippy::match_like_matches_macro,
50    clippy::match_same_arms,
51    reason = "match is more readable"
52)]
53pub fn is_meta_character(c: char) -> bool {
54    match c {
55        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$' | '#' | '-' => true,
56        // This match arm differs from `regex-syntax` by including `'/'`.
57        // Ruby uses `/` to mark `Regexp` literals in source code.
58        '/' => true,
59        // This match arm differs from `regex-syntax` by including ' ' (an ASCII
60        // space character). Ruby always escapes ' ' in calls to `Regexp::escape`.
61        ' ' => true,
62        _ => false,
63    }
64}
65
66/// Returns true if the given character is non-printable and needs to be quoted.
67#[must_use]
68pub const fn is_non_printable_character(c: char) -> bool {
69    matches!(
70        c,
71        '\n' | '\r' | '\t' |
72        // form feed aka `"\f"`
73        '\u{C}'
74    )
75}
76
77/// Returns `Some(_)` if the given character is non-printable and Rust does not
78/// support the escape sequence.
79#[must_use]
80pub const fn is_non_supported_non_printable_character(c: char) -> Option<&'static str> {
81    match c {
82        // form feed aka `"\f"`
83        '\u{C}' => Some(r"\f"),
84        _ => None,
85    }
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91
92    #[test]
93    fn escape_meta_charactors() {
94        assert_eq!(
95            escape(r"\.+*?()|[]{}^$#-"),
96            r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\-".to_string()
97        );
98    }
99
100    #[test]
101    fn keep_normal_charactors() {
102        assert_eq!(escape(r"abc&~"), r"abc&~".to_string());
103    }
104}