artichoke_backend/extn/core/regexp/
syntax.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
// This module is forked from `regex-syntax` crate @ `26f7318e`.
//
// https://github.com/rust-lang/regex/blob/26f7318e2895eae56e95a260e81e2d48b90e7c25/regex-syntax/src/lib.rs
//
// MIT License
// Copyright (c) 2014 The Rust Project Developers

#![allow(clippy::match_same_arms)]

//! Helpers for parsing Regexp patterns.

/// Escapes all regular expression meta characters in `text`.
///
/// The string returned may be safely used as a literal in a regular expression.
#[must_use]
pub fn escape(text: &str) -> String {
    let mut quoted = String::new();
    escape_into(text, &mut quoted);
    quoted
}

/// Escapes all meta characters in `text` and writes the result into `buf`.
///
/// This will append escape characters into the given buffer. The characters
/// that are appended are safe to use as a literal in a regular expression.
pub fn escape_into(text: &str, buf: &mut String) {
    buf.reserve(text.len());
    for c in text.chars() {
        match c {
            c if is_meta_character(c) => {
                buf.push('\\');
                buf.push(c);
            }
            c if is_non_printable_character(c) => {
                if let Some(escape) = is_non_supported_non_printable_character(c) {
                    buf.push_str(escape);
                } else {
                    for part in c.escape_default() {
                        buf.push(part);
                    }
                }
            }
            c => buf.push(c),
        }
    }
}

/// Returns true if the given character has significance in a regex.
#[must_use]
#[allow(clippy::match_like_matches_macro)]
pub fn is_meta_character(c: char) -> bool {
    match c {
        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$' | '#' | '-' => true,
        // This match arm differs from `regex-syntax` by including '/'.
        // Ruby uses '/' to mark `Regexp` literals in source code.
        '/' => true,
        // This match arm differs from `regex-syntax` by including ' ' (an ASCII
        // space character). Ruby always escapes ' ' in calls to `Regexp::escape`.
        ' ' => true,
        _ => false,
    }
}

/// Returns true if the given character is non-printable and needs to be quoted.
#[must_use]
pub const fn is_non_printable_character(c: char) -> bool {
    matches!(
        c,
        '\n' | '\r' | '\t' |
        // form feed aka "\f"
        '\u{C}'
    )
}

/// Returns `Some(_)` if the given character is non-printable and Rust does not
/// support the escape sequence.
#[must_use]
pub const fn is_non_supported_non_printable_character(c: char) -> Option<&'static str> {
    match c {
        // form feed aka "\f"
        '\u{C}' => Some(r"\f"),
        _ => None,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn escape_meta_charactors() {
        assert_eq!(
            escape(r"\.+*?()|[]{}^$#-"),
            r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\-".to_string()
        );
    }

    #[test]
    fn keep_normal_charactors() {
        assert_eq!(escape(r"abc&~"), r"abc&~".to_string());
    }
}