macro_rules! impl_partial_eq {
($lhs:ty, $rhs:ty) => {
impl<'a, 'b> PartialEq<$rhs> for $lhs {
#[inline]
fn eq(&self, other: &$rhs) -> bool {
let other: &[u8] = other.as_ref();
PartialEq::eq(self.as_bytes(), other)
}
}
impl<'a, 'b> PartialEq<$lhs> for $rhs {
#[inline]
fn eq(&self, other: &$lhs) -> bool {
let this: &[u8] = self.as_ref();
PartialEq::eq(this, other.as_bytes())
}
}
};
}
macro_rules! impl_partial_eq_array {
($lhs:ty, $rhs:ty) => {
impl<'a, 'b, const N: usize> PartialEq<$rhs> for $lhs {
#[inline]
fn eq(&self, other: &$rhs) -> bool {
let other: &[u8] = other.as_ref();
PartialEq::eq(self.as_bytes(), other)
}
}
impl<'a, 'b, const N: usize> PartialEq<$lhs> for $rhs {
#[inline]
fn eq(&self, other: &$lhs) -> bool {
let this: &[u8] = self.as_ref();
PartialEq::eq(this, other.as_bytes())
}
}
};
}
mod borrowed;
mod inspect;
mod owned;
pub use borrowed::Codepoints;
pub use borrowed::Utf8Str;
pub use inspect::Inspect;
pub use owned::Utf8String;
#[cfg(test)]
#[allow(clippy::invisible_characters)]
mod tests {
use alloc::string::String;
use alloc::vec::Vec;
use core::str;
use quickcheck::quickcheck;
use super::{Utf8Str, Utf8String};
const REPLACEMENT_CHARACTER_BYTES: [u8; 3] = [239, 191, 189];
quickcheck! {
fn fuzz_char_len_utf8_contents_utf8_string(contents: String) -> bool {
let expected = contents.chars().count();
let s = Utf8String::from(contents);
s.char_len() == expected
}
fn fuzz_len_utf8_contents_utf8_string(contents: String) -> bool {
let expected = contents.len();
let s = Utf8String::from(contents);
s.len() == expected
}
fn fuzz_char_len_binary_contents_utf8_string(contents: Vec<u8>) -> bool {
if let Ok(utf8_contents) = str::from_utf8(&contents) {
let expected = utf8_contents.chars().count();
let s = Utf8String::from(contents);
s.char_len() == expected
} else {
let expected_at_most = contents.len();
let s = Utf8String::from(contents);
s.char_len() <= expected_at_most
}
}
fn fuzz_len_binary_contents_utf8_string(contents: Vec<u8>) -> bool {
let expected = contents.len();
let s = Utf8String::from(contents);
s.len() == expected
}
}
#[test]
fn constructs_empty_buffer() {
let s = Utf8String::from(Vec::new());
assert_eq!(0, s.len());
}
#[test]
fn char_len_empty() {
let s = Utf8String::from("");
assert_eq!(s.char_len(), 0);
}
#[test]
fn char_len_ascii() {
let s = Utf8String::from("Artichoke Ruby");
assert_eq!(s.char_len(), 14);
}
#[test]
fn char_len_emoji() {
let s = Utf8String::from("๐");
assert_eq!(s.char_len(), 1);
let s = Utf8String::from("๐๐ฆ๐");
assert_eq!(s.char_len(), 3);
let s = Utf8String::from("a๐b๐ฆc๐d");
assert_eq!(s.char_len(), 7);
let s = Utf8String::from(b"a\xF0\x9F\x92\x8E\xFFabc");
assert_eq!(s.char_len(), 6);
}
#[test]
fn char_len_unicode_replacement_character() {
let s = Utf8String::from("๏ฟฝ");
assert_eq!(s.char_len(), 1);
let s = Utf8String::from("๏ฟฝ๏ฟฝ๏ฟฝ");
assert_eq!(s.char_len(), 3);
let s = Utf8String::from("a๏ฟฝb๏ฟฝc๏ฟฝd");
assert_eq!(s.char_len(), 7);
let s = Utf8String::from("๏ฟฝ๐b๐ฆc๐๏ฟฝ");
assert_eq!(s.char_len(), 7);
let s = Utf8String::from(b"\xEF\xBF\xBD\xF0\x9F\x92\x8E\xFF\xEF\xBF\xBDab");
assert_eq!(s.char_len(), 6);
let s = Utf8String::from(REPLACEMENT_CHARACTER_BYTES);
assert_eq!(s.char_len(), 1);
}
#[test]
fn char_len_nul_byte() {
let s = Utf8String::from(b"\x00");
assert_eq!(s.char_len(), 1);
let s = Utf8String::from(b"abc\x00");
assert_eq!(s.char_len(), 4);
let s = Utf8String::from(b"abc\x00xyz");
assert_eq!(s.char_len(), 7);
}
#[test]
fn char_len_invalid_utf8_byte_sequences() {
let s = Utf8String::from(b"\x00\x00\xD8\x00");
assert_eq!(s.char_len(), 4);
let s = Utf8String::from(b"\xFF\xFE");
assert_eq!(s.char_len(), 2);
}
#[test]
fn char_len_binary() {
let bytes = &[
0xB3, 0x7E, 0x39, 0x70, 0x8E, 0xFD, 0xBB, 0x75, 0x62, 0x77, 0xE7, 0xDF, 0x6F, 0xF2, 0x76, 0x27, 0x81,
0x9A, 0x3A, 0x9D, 0xED, 0x6B, 0x4F, 0xAE, 0xC4, 0xE7, 0xA1, 0x66, 0x11, 0xF1, 0x08, 0x1C,
];
let s = Utf8String::from(bytes);
assert_eq!(s.char_len(), 32);
let bytes = &[
b'?', b'!', b'a', b'b', b'c', 0xFD, 0xBB, 0x75, 0x62, 0x77, 0xE7, 0xDF, 0x6F, 0xF2, 0x76, 0x27, 0x81,
0x9A, 0x3A, 0x9D, 0xED, 0x6B, 0x4F, 0xAE, 0xC4, 0xE7, 0xA1, 0x66, 0x11, 0xF1, 0x08, 0x1C,
];
let s = Utf8String::from(bytes);
assert_eq!(s.char_len(), 32);
}
#[test]
fn char_len_mixed_ascii_emoji_invalid_bytes() {
let s = Utf8String::from(b"\xF0\x9F\xA6\x80\x61\x62\x63\xF0\x9F\x92\x8E\xFF");
assert_eq!(s.char_len(), 6);
}
#[test]
fn char_len_utf8() {
let s = Utf8String::from("ฮฉโรงโโซหยตโคโฅรท");
assert_eq!(s.char_len(), 10);
let s = Utf8String::from("รฅรโฦยฉหโหยฌโฆรฆ");
assert_eq!(s.char_len(), 11);
let s = Utf8String::from("ลโยดยฎโ ยฅยจหรธฯโโ");
assert_eq!(s.char_len(), 12);
let s = Utf8String::from("ยกโขยฃยขโยงยถโขยชยบโโ ");
assert_eq!(s.char_len(), 12);
let s = Utf8String::from("ยธหรโฤฑหรยฏหยฟ");
assert_eq!(s.char_len(), 10);
let s = Utf8String::from("ร
รรรหรร๏ฃฟรรรโ");
assert_eq!(s.char_len(), 12);
let s = Utf8String::from("ลโยดโฐหรยจหรโโโ");
assert_eq!(s.char_len(), 12);
let s = Utf8String::from("`โโฌโนโบ๏ฌ๏ฌโกยฐยทโโยฑ");
assert_eq!(s.char_len(), 13);
let s = Utf8String::from("โ
โ
โ
โ
");
assert_eq!(s.char_len(), 4);
let s = Utf8String::from("ะะะะะ
ะะะะะะะะะะะะะะะะะะะะะะะะะะะ ะกะขะฃะคะฅะฆะงะจะฉะชะซะฌะญะฎะฏะฐะฑะฒะณะดะตะถะทะธะนะบะปะผะฝะพะฟัััััั
ัััััััััั");
assert_eq!(s.char_len(), 79);
}
#[test]
fn char_len_vmware_super_string() {
let s = Utf8String::from("่กจใใA้ทลรฉ๏ผข้รรยชฤ
รฑไธใ๐ ");
assert_eq!(s.char_len(), 17);
}
#[test]
fn char_len_two_byte_chars() {
let s = Utf8String::from("็ฐไธญใใใซใใใฆไธใใ");
assert_eq!(s.char_len(), 11);
let s = Utf8String::from("ใใผใใฃใผใธ่กใใชใใ");
assert_eq!(s.char_len(), 11);
let s = Utf8String::from("ๅ่ฃฝๆผข่ช");
assert_eq!(s.char_len(), 4);
let s = Utf8String::from("้จ่ฝๆ ผ");
assert_eq!(s.char_len(), 3);
let s = Utf8String::from("์ฌํ๊ณผํ์ ์ดํ์ฐ๊ตฌ์");
assert_eq!(s.char_len(), 11);
let s = Utf8String::from("์ฐฆ์ฐจ๋ฅผ ํ๊ณ ์จ ํฒ์๋งจ๊ณผ ์๋ค๋ฆฌ ๋ ๋ฐฉ๊ฐํ");
assert_eq!(s.char_len(), 22);
let s = Utf8String::from("็คพๆ็งๅญธ้ข่ชๅญธ็ ็ฉถๆ");
assert_eq!(s.char_len(), 10);
let s = Utf8String::from("์ธ๋๋ฐํ ๋ฅด");
assert_eq!(s.char_len(), 5);
let s = Utf8String::from("๐ ๐ ฑ๐ น๐ ฑ๐ ฑธ๐ ฒ๐ ณ");
assert_eq!(s.char_len(), 7);
}
#[test]
fn char_len_space_chars() {
let bytes = " ย
แโโโโโโ
โโโโโโโจโฉโฏโใ
";
let s = Utf8String::from(bytes);
assert_eq!(s.char_len(), 25);
}
#[test]
fn casing_utf8_string_empty() {
let mut s = Utf8String::from(b"");
s.make_capitalized();
assert_eq!(s, "");
s.make_lowercase();
assert_eq!(s, "");
s.make_uppercase();
assert_eq!(s, "");
}
#[test]
fn casing_utf8_string_ascii() {
let lower = Utf8String::from(b"abc");
let mid_upper = Utf8String::from(b"aBc");
let upper = Utf8String::from(b"ABC");
let long = Utf8String::from(b"aBC, 123, ABC, baby you and me girl");
let capitalize: fn(&Utf8String) -> Utf8String = |value: &Utf8String| {
let mut value = value.clone();
value.make_capitalized();
value
};
let lowercase: fn(&Utf8String) -> Utf8String = |value: &Utf8String| {
let mut value = value.clone();
value.make_lowercase();
value
};
let uppercase: fn(&Utf8String) -> Utf8String = |value: &Utf8String| {
let mut value = value.clone();
value.make_uppercase();
value
};
assert_eq!(capitalize(&lower), "Abc");
assert_eq!(capitalize(&mid_upper), "Abc");
assert_eq!(capitalize(&upper), "Abc");
assert_eq!(capitalize(&long), "Abc, 123, abc, baby you and me girl");
assert_eq!(lowercase(&lower), "abc");
assert_eq!(lowercase(&mid_upper), "abc");
assert_eq!(lowercase(&upper), "abc");
assert_eq!(lowercase(&long), "abc, 123, abc, baby you and me girl");
assert_eq!(uppercase(&lower), "ABC");
assert_eq!(uppercase(&mid_upper), "ABC");
assert_eq!(uppercase(&upper), "ABC");
assert_eq!(uppercase(&long), "ABC, 123, ABC, BABY YOU AND ME GIRL");
}
#[test]
fn casing_utf8_string_utf8() {
let sharp_s = Utf8String::from("ร");
let tomorrow = Utf8String::from("ฮฑฯฯฮนฮฟ");
let year = Utf8String::from("ฮญฯฮฟฯ");
let two_byte_chars = Utf8String::from("๐ ๐๐๐๐๐ก๐๐ ๐๐๐ก๐๐/๐๐๐๐๐ค๐ ๐๐๐ ๐๐ ๐ ๐ก๐๐๐๐ค๐๐ ๐ฑ๐ ๐ ๐๐๐๐๐ก๐๐ ๐๐๐
๐ค๐๐๐๐ก๐๐๐๐");
let varying_length = Utf8String::from("zศบศพ");
let rtl = Utf8String::from("ู
ุฑุญุจุง ุงูุฎุฑุดูู");
let capitalize: fn(&Utf8String) -> Utf8String = |value: &Utf8String| {
let mut value = value.clone();
value.make_capitalized();
value
};
let lowercase: fn(&Utf8String) -> Utf8String = |value: &Utf8String| {
let mut value = value.clone();
value.make_lowercase();
value
};
let uppercase: fn(&Utf8String) -> Utf8String = |value: &Utf8String| {
let mut value = value.clone();
value.make_uppercase();
value
};
assert_eq!(capitalize(&sharp_s), "SS");
assert_eq!(capitalize(&tomorrow), "ฮฯฯฮนฮฟ");
assert_eq!(capitalize(&year), "ฮฯฮฟฯ");
assert_eq!(
capitalize(&two_byte_chars),
"๐ ๐ผ๐ฏ๐
๐จ๐๐ฏ๐ป ๐๐ฒ๐๐
๐ป/๐
๐ฏ๐ฟ๐ฒ๐๐ผ ๐บ๐ณ๐ฟ ๐บ๐ด ๐ ๐๐จ๐พ๐ฏ๐๐ป๐
๐ฑ๐ ๐ ๐ผ๐ฏ๐
๐จ๐๐ฏ๐ป ๐ท๐ฎ๐ญ๐๐ฎ๐๐ฒ๐๐
๐ฎ๐ป๐ฎ"
);
assert_eq!(capitalize(&varying_length), "Zโฑฅโฑฆ");
assert_eq!(capitalize(&rtl), "ู
ุฑุญุจุง ุงูุฎุฑุดูู");
assert_eq!(lowercase(&sharp_s), "ร");
assert_eq!(lowercase(&tomorrow), "ฮฑฯฯฮนฮฟ");
assert_eq!(lowercase(&year), "ฮญฯฮฟฯ");
assert_eq!(
lowercase(&two_byte_chars),
"๐ ๐ผ๐ฏ๐
๐จ๐๐ฏ๐ป ๐๐ฒ๐๐
๐ป/๐
๐ฏ๐ฟ๐ฒ๐๐ผ ๐บ๐ณ๐ฟ ๐บ๐ด ๐ ๐๐จ๐พ๐ฏ๐๐ป๐
๐ฑ๐ ๐ ๐ผ๐ฏ๐
๐จ๐๐ฏ๐ป ๐ท๐ฎ๐ญ๐๐ฎ๐๐ฒ๐๐
๐ฎ๐ป๐ฎ"
);
assert_eq!(lowercase(&varying_length), "zโฑฅโฑฆ");
assert_eq!(lowercase(&rtl), "ู
ุฑุญุจุง ุงูุฎุฑุดูู");
assert_eq!(uppercase(&sharp_s), "SS");
assert_eq!(uppercase(&tomorrow), "ฮฮฮกฮฮ");
assert_eq!(uppercase(&year), "ฮฮคฮฮฃ");
assert_eq!(
uppercase(&two_byte_chars),
"๐ ๐๐๐๐๐ก๐๐ ๐๐๐ก๐๐/๐๐๐๐๐ค๐ ๐๐๐ ๐๐ ๐ ๐ก๐๐๐๐ค๐๐ ๐๐ ๐ ๐๐๐๐๐ก๐๐ ๐๐๐
๐ค๐๐๐๐ก๐๐๐๐"
);
assert_eq!(uppercase(&varying_length), "Zศบศพ");
assert_eq!(uppercase(&rtl), "ู
ุฑุญุจุง ุงูุฎุฑุดูู");
}
#[test]
fn casing_utf8_string_invalid_utf8() {
let mut s = Utf8String::from(b"\xFF\xFE");
s.make_capitalized();
assert_eq!(s, &b"\xFF\xFE"[..]);
s.make_lowercase();
assert_eq!(s, &b"\xFF\xFE"[..]);
s.make_uppercase();
assert_eq!(s, &b"\xFF\xFE"[..]);
}
#[test]
fn casing_utf8_string_unicode_replacement_character() {
let mut s = Utf8String::from("๏ฟฝ");
s.make_capitalized();
assert_eq!(s, "๏ฟฝ");
s.make_lowercase();
assert_eq!(s, "๏ฟฝ");
s.make_uppercase();
assert_eq!(s, "๏ฟฝ");
}
#[test]
fn chr_does_not_return_more_than_one_byte_for_invalid_utf8() {
let s = Utf8String::from(b"\xF0\x9F\x87");
assert_eq!(s.chr(), b"\xF0");
}
#[test]
fn get_char_slice_valid_range() {
let s = Utf8String::from(b"a\xF0\x9F\x92\x8E\xFF".to_vec()); assert_eq!(s.get_char_slice(0..0), Some(Utf8Str::empty()));
assert_eq!(s.get_char_slice(0..1), Some(Utf8Str::new(b"a")));
assert_eq!(s.get_char_slice(0..2), Some(Utf8Str::new("a๐")));
assert_eq!(s.get_char_slice(0..3), Some(Utf8Str::new(b"a\xF0\x9F\x92\x8E\xFF")));
assert_eq!(s.get_char_slice(0..4), Some(Utf8Str::new(b"a\xF0\x9F\x92\x8E\xFF")));
assert_eq!(s.get_char_slice(1..1), Some(Utf8Str::empty()));
assert_eq!(s.get_char_slice(1..2), Some(Utf8Str::new("๐")));
assert_eq!(s.get_char_slice(1..3), Some(Utf8Str::new(b"\xF0\x9F\x92\x8E\xFF")));
}
#[test]
#[allow(clippy::reversed_empty_ranges)]
fn get_char_slice_invalid_range() {
let s = Utf8String::from(b"a\xF0\x9F\x92\x8E\xFF".to_vec()); assert_eq!(s.get_char_slice(4..5), None);
assert_eq!(s.get_char_slice(4..1), None);
assert_eq!(s.get_char_slice(3..1), Some(Utf8Str::empty()));
assert_eq!(s.get_char_slice(2..1), Some(Utf8Str::empty()));
assert_eq!(s.get_char_slice(7..10), None);
assert_eq!(s.get_char_slice(10..8), None);
assert_eq!(s.get_char_slice(10..5), None);
assert_eq!(s.get_char_slice(10..2), None);
}
#[test]
fn index_with_default_offset() {
let s = Utf8String::from("f๐oo");
assert_eq!(s.index("f".as_bytes(), 0), Some(0));
assert_eq!(s.index("o".as_bytes(), 0), Some(2));
assert_eq!(s.index("oo".as_bytes(), 0), Some(2));
assert_eq!(s.index("ooo".as_bytes(), 0), None);
}
#[test]
fn index_with_different_offset() {
let s = Utf8String::from("f๐oo");
assert_eq!(s.index("o".as_bytes(), 1), Some(2));
assert_eq!(s.index("o".as_bytes(), 2), Some(2));
assert_eq!(s.index("o".as_bytes(), 3), Some(3));
assert_eq!(s.index("o".as_bytes(), 4), None);
}
#[test]
fn rindex_with_default_offset() {
let s = Utf8String::from("f๐oo");
assert_eq!(s.rindex("f".as_bytes(), 3), Some(0));
assert_eq!(s.rindex("o".as_bytes(), 3), Some(3));
assert_eq!(s.rindex("oo".as_bytes(), 3), Some(2));
assert_eq!(s.rindex("ooo".as_bytes(), 3), None);
}
#[test]
fn rindex_with_different_offset() {
let s = Utf8String::from("f๐oo");
assert_eq!(s.rindex("o".as_bytes(), 4), Some(3));
assert_eq!(s.rindex("o".as_bytes(), 3), Some(3));
assert_eq!(s.rindex("o".as_bytes(), 2), Some(2));
assert_eq!(s.rindex("o".as_bytes(), 1), None);
assert_eq!(s.rindex("o".as_bytes(), 0), None);
}
#[test]
fn index_and_rindex_support_invalid_utf8_in_needle() {
let needle = &"๐".as_bytes()[..3];
assert_eq!(Utf8String::from("f๐oo").index(needle, 0), None); assert_eq!(Utf8String::from("f๐oo").rindex(needle, 3), None); }
#[test]
fn index_and_rindex_support_invalid_utf8_in_haystack() {
let mut haystack = Vec::new();
haystack.extend_from_slice(b"f");
haystack.extend_from_slice(&"๐".as_bytes()[..2]);
haystack.extend_from_slice(b"oo");
let haystack = Utf8String::from(haystack);
assert_eq!(haystack.index("๐".as_bytes(), 0), None);
assert_eq!(haystack.rindex("๐".as_bytes(), 3), None);
}
#[test]
fn index_empties() {
let s = Utf8String::from("");
assert_eq!(s.index(b"", 0), Some(0));
assert_eq!(s.index(b"a", 0), None);
let s = Utf8String::from("a");
assert_eq!(s.index(b"", 0), Some(0));
}
#[test]
fn rindex_empties() {
let s = Utf8String::from("");
assert_eq!(s.rindex(b"", usize::MAX), Some(0));
assert_eq!(s.rindex(b"", 1), Some(0));
assert_eq!(s.rindex(b"", 0), Some(0));
assert_eq!(s.rindex(b"a", usize::MAX), None);
assert_eq!(s.rindex(b"a", 1), None);
assert_eq!(s.rindex(b"a", 0), None);
let s = Utf8String::from("a");
assert_eq!(s.rindex(b"", usize::MAX), Some(1));
assert_eq!(s.rindex(b"", 1), Some(1));
}
}