onig/
buffers.rs

1//! Encoded Buffers Implementation
2//!
3//! This module contains a trait used for converting byte buffers or
4//! Rust strings into oniguruma char buffers to search and compile
5//! with.
6
7/// Encoded String Buffer
8///
9/// Represents a buffer of characters with encoding information
10/// attached.
11pub trait EncodedChars {
12    /// Pointer to the start of the pattern
13    ///
14    /// This should point to the first character in the buffer,
15    /// encoded as an `onig_sys` character.
16    fn start_ptr(&self) -> *const onig_sys::OnigUChar;
17
18    /// Pointer to the limit of the pattern buffer
19    ///
20    /// This should point just past the final character in the buffer,
21    /// encoded as an `onig_sys` character.
22    fn limit_ptr(&self) -> *const onig_sys::OnigUChar;
23
24    /// The encoding of the contents of the buffer
25    fn encoding(&self) -> onig_sys::OnigEncoding {
26        unsafe { &mut onig_sys::OnigEncodingUTF8 }
27    }
28
29    /// The length of this buffer
30    fn len(&self) -> usize;
31
32    /// Is the buffer empty?
33    fn is_empty(&self) -> bool {
34        self.len() == 0
35    }
36}
37
38/// Encoded Charters from a `str` Reference
39impl<T> EncodedChars for T
40where
41    T: AsRef<str>,
42{
43    fn start_ptr(&self) -> *const onig_sys::OnigUChar {
44        self.as_ref().as_bytes().as_ptr()
45    }
46
47    fn limit_ptr(&self) -> *const onig_sys::OnigUChar {
48        let bytes = self.as_ref().as_bytes();
49        bytes[bytes.len()..].as_ptr()
50    }
51
52    fn len(&self) -> usize {
53        self.as_ref().len()
54    }
55}
56
57/// Byte Buffer
58///
59/// Represents a buffer of bytes, with an encoding.
60pub struct EncodedBytes<'a> {
61    bytes: &'a [u8],
62    enc: onig_sys::OnigEncoding,
63}
64
65impl<'a> EncodedBytes<'a> {
66    /// New Buffer from Parts
67    ///
68    /// # Arguments
69    ///
70    ///  * `bytes` - The contents of the buffer
71    ///  * `enc` - The encoding this buffer is in
72    ///
73    /// # Returns
74    ///
75    /// A new buffer instance
76    pub fn from_parts(bytes: &'a [u8], enc: onig_sys::OnigEncoding) -> EncodedBytes<'a> {
77        EncodedBytes { bytes, enc }
78    }
79
80    /// New ASCII Buffer
81    ///
82    /// # Arguments
83    ///
84    ///  * `bytes` - The ASCII encoded string
85    ///
86    /// # Returns
87    ///
88    /// A new buffer instance
89    pub fn ascii(bytes: &'a [u8]) -> EncodedBytes<'a> {
90        EncodedBytes {
91            bytes,
92            enc: unsafe { &mut onig_sys::OnigEncodingASCII },
93        }
94    }
95}
96
97impl<'a> EncodedChars for EncodedBytes<'a> {
98    fn start_ptr(&self) -> *const onig_sys::OnigUChar {
99        self.bytes.as_ptr()
100    }
101
102    fn limit_ptr(&self) -> *const onig_sys::OnigUChar {
103        self.bytes[self.bytes.len()..].as_ptr()
104    }
105
106    fn encoding(&self) -> onig_sys::OnigEncoding {
107        self.enc
108    }
109
110    fn len(&self) -> usize {
111        self.bytes.len()
112    }
113}
114
115#[cfg(test)]
116pub mod tests {
117    use super::*;
118
119    #[test]
120    pub fn rust_string_encoding_is_utf8() {
121        let foo = "foo";
122        assert_eq!(
123            unsafe { &mut onig_sys::OnigEncodingUTF8 } as onig_sys::OnigEncoding,
124            foo.encoding()
125        );
126
127        let bar = String::from(".*");
128        assert_eq!(
129            unsafe { &mut onig_sys::OnigEncodingUTF8 } as onig_sys::OnigEncoding,
130            bar.encoding()
131        );
132    }
133
134    #[test]
135    pub fn rust_bytes_encoding_is_ascii() {
136        let fizz = b"fizz";
137        let buff = EncodedBytes::ascii(fizz);
138        assert_eq!(
139            unsafe { &mut onig_sys::OnigEncodingASCII } as onig_sys::OnigEncoding,
140            buff.encoding()
141        );
142    }
143
144    #[test]
145    pub fn rust_string_ptr_offsets_are_valid() {
146        let test_string = "hello world";
147        assert_eq!(
148            test_string.limit_ptr() as usize - test_string.start_ptr() as usize,
149            test_string.len()
150        );
151    }
152
153    #[test]
154    pub fn rust_bytes_ptr_offsets_are_valid() {
155        let fozz = b"foo.*bar";
156        let buff = EncodedBytes::ascii(fozz);
157        assert_eq!(
158            buff.limit_ptr() as usize - buff.start_ptr() as usize,
159            fozz.len()
160        );
161    }
162
163    #[test]
164    pub fn byte_buffer_create() {
165        let buff = b"hello world";
166        let enc_buffer =
167            EncodedBytes::from_parts(buff, unsafe { &mut onig_sys::OnigEncodingASCII });
168        assert_eq!(
169            unsafe { &mut onig_sys::OnigEncodingASCII } as onig_sys::OnigEncoding,
170            enc_buffer.encoding()
171        );
172        assert_eq!(
173            enc_buffer.limit_ptr() as usize - enc_buffer.start_ptr() as usize,
174            buff.len()
175        );
176    }
177}