onig/buffers.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
//! Encoded Buffers Implementation
//!
//! This module contains a trait used for converting byte buffers or
//! Rust strings into oniguruma char buffers to search and compile
//! with.
/// Encoded String Buffer
///
/// Represents a buffer of characters with encoding information
/// attached.
pub trait EncodedChars {
/// Pointer to the start of the pattern
///
/// This should point to the first character in the buffer,
/// encoded as an `onig_sys` character.
fn start_ptr(&self) -> *const onig_sys::OnigUChar;
/// Pointer to the limit of the pattern buffer
///
/// This should point just past the final character in the buffer,
/// encoded as an `onig_sys` character.
fn limit_ptr(&self) -> *const onig_sys::OnigUChar;
/// The encoding of the contents of the buffer
fn encoding(&self) -> onig_sys::OnigEncoding {
unsafe { &mut onig_sys::OnigEncodingUTF8 }
}
/// The length of this buffer
fn len(&self) -> usize;
/// Is the buffer empty?
fn is_empty(&self) -> bool {
self.len() == 0
}
}
/// Encoded Charters from a `str` Reference
impl<T> EncodedChars for T
where
T: AsRef<str>,
{
fn start_ptr(&self) -> *const onig_sys::OnigUChar {
self.as_ref().as_bytes().as_ptr()
}
fn limit_ptr(&self) -> *const onig_sys::OnigUChar {
let bytes = self.as_ref().as_bytes();
bytes[bytes.len()..].as_ptr()
}
fn len(&self) -> usize {
self.as_ref().len()
}
}
/// Byte Buffer
///
/// Represents a buffer of bytes, with an encoding.
pub struct EncodedBytes<'a> {
bytes: &'a [u8],
enc: onig_sys::OnigEncoding,
}
impl<'a> EncodedBytes<'a> {
/// New Buffer from Parts
///
/// # Arguments
///
/// * `bytes` - The contents of the buffer
/// * `enc` - The encoding this buffer is in
///
/// # Returns
///
/// A new buffer instance
pub fn from_parts(bytes: &'a [u8], enc: onig_sys::OnigEncoding) -> EncodedBytes<'a> {
EncodedBytes { bytes, enc }
}
/// New ASCII Buffer
///
/// # Arguments
///
/// * `bytes` - The ASCII encoded string
///
/// # Returns
///
/// A new buffer instance
pub fn ascii(bytes: &'a [u8]) -> EncodedBytes<'a> {
EncodedBytes {
bytes,
enc: unsafe { &mut onig_sys::OnigEncodingASCII },
}
}
}
impl<'a> EncodedChars for EncodedBytes<'a> {
fn start_ptr(&self) -> *const onig_sys::OnigUChar {
self.bytes.as_ptr()
}
fn limit_ptr(&self) -> *const onig_sys::OnigUChar {
self.bytes[self.bytes.len()..].as_ptr()
}
fn encoding(&self) -> onig_sys::OnigEncoding {
self.enc
}
fn len(&self) -> usize {
self.bytes.len()
}
}
#[cfg(test)]
pub mod tests {
use super::*;
#[test]
pub fn rust_string_encoding_is_utf8() {
let foo = "foo";
assert_eq!(
unsafe { &mut onig_sys::OnigEncodingUTF8 } as onig_sys::OnigEncoding,
foo.encoding()
);
let bar = String::from(".*");
assert_eq!(
unsafe { &mut onig_sys::OnigEncodingUTF8 } as onig_sys::OnigEncoding,
bar.encoding()
);
}
#[test]
pub fn rust_bytes_encoding_is_ascii() {
let fizz = b"fizz";
let buff = EncodedBytes::ascii(fizz);
assert_eq!(
unsafe { &mut onig_sys::OnigEncodingASCII } as onig_sys::OnigEncoding,
buff.encoding()
);
}
#[test]
pub fn rust_string_ptr_offsets_are_valid() {
let test_string = "hello world";
assert_eq!(
test_string.limit_ptr() as usize - test_string.start_ptr() as usize,
test_string.len()
);
}
#[test]
pub fn rust_bytes_ptr_offsets_are_valid() {
let fozz = b"foo.*bar";
let buff = EncodedBytes::ascii(fozz);
assert_eq!(
buff.limit_ptr() as usize - buff.start_ptr() as usize,
fozz.len()
);
}
#[test]
pub fn byte_buffer_create() {
let buff = b"hello world";
let enc_buffer =
EncodedBytes::from_parts(buff, unsafe { &mut onig_sys::OnigEncodingASCII });
assert_eq!(
unsafe { &mut onig_sys::OnigEncodingASCII } as onig_sys::OnigEncoding,
enc_buffer.encoding()
);
assert_eq!(
enc_buffer.limit_ptr() as usize - enc_buffer.start_ptr() as usize,
buff.len()
);
}
}