spinoso_string/enc/utf8/
owned.rs1use alloc::collections::TryReserveError;
2use alloc::vec::Vec;
3
4use scolapasta_strbuf::Buf;
5
6use super::Utf8Str;
7use crate::case_folding::CaseFoldingEffect;
8use crate::chars::ConventionallyUtf8;
9use crate::codepoints::InvalidCodepointError;
10use crate::enc::utf8::case_change;
11use crate::iter::IntoIter;
12
13mod eq;
14mod impls;
15#[cfg(feature = "std")]
16mod io;
17
18#[repr(transparent)]
19#[derive(Hash, PartialEq, Eq, PartialOrd, Ord)]
20pub struct Utf8String {
21 inner: Buf,
22}
23
24impl Utf8String {
26 #[inline]
27 pub const fn new(buf: Buf) -> Self {
28 Self { inner: buf }
29 }
30
31 #[inline]
32 pub fn empty() -> Self {
33 Self { inner: Buf::new() }
34 }
35}
36
37impl Utf8String {
39 #[inline]
40 #[must_use]
41 pub(crate) fn into_buf(self) -> Buf {
42 self.inner
43 }
44
45 #[inline]
46 #[must_use]
47 pub fn as_utf8_str(&self) -> &Utf8Str {
48 Utf8Str::from_bytes(self.inner.as_slice())
49 }
50
51 #[inline]
52 #[must_use]
53 pub fn as_mut_utf8_str(&mut self) -> &mut Utf8Str {
54 Utf8Str::from_bytes_mut(self.inner.as_mut_slice())
55 }
56}
57
58impl Utf8String {
60 #[inline]
61 #[must_use]
62 pub fn into_iter(self) -> IntoIter {
63 IntoIter::from_vec(self.inner.into_inner())
64 }
65}
66
67impl Utf8String {
69 #[inline]
70 pub unsafe fn set_len(&mut self, len: usize) {
71 unsafe {
74 self.inner.set_len(len);
75 }
76 }
77
78 #[inline]
79 #[must_use]
80 pub fn capacity(&self) -> usize {
81 self.inner.capacity()
82 }
83
84 #[inline]
85 pub fn clear(&mut self) {
86 self.inner.clear();
87 }
88
89 #[inline]
90 pub fn truncate(&mut self, len: usize) {
91 self.inner.truncate(len);
92 }
93}
94
95impl Utf8String {
97 #[inline]
98 pub fn reserve(&mut self, additional: usize) {
99 self.inner.reserve(additional);
100 }
101
102 #[inline]
103 pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
104 self.inner.try_reserve(additional)
105 }
106
107 #[inline]
108 pub fn reserve_exact(&mut self, additional: usize) {
109 self.inner.reserve_exact(additional);
110 }
111
112 #[inline]
113 pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
114 self.inner.try_reserve_exact(additional)
115 }
116
117 #[inline]
118 pub fn shrink_to_fit(&mut self) {
119 self.inner.shrink_to_fit();
120 }
121
122 #[inline]
123 pub fn shrink_to(&mut self, min_capacity: usize) {
124 self.inner.shrink_to(min_capacity);
125 }
126}
127
128impl Utf8String {
130 #[inline]
131 pub fn push_byte(&mut self, byte: u8) {
132 self.inner.push_byte(byte);
133 }
134
135 #[inline]
136 pub fn try_push_codepoint(&mut self, codepoint: i64) -> Result<(), InvalidCodepointError> {
137 let codepoint = if let Ok(codepoint) = u32::try_from(codepoint) {
138 codepoint
139 } else {
140 return Err(InvalidCodepointError::codepoint_out_of_range(codepoint));
141 };
142 if let Ok(ch) = char::try_from(codepoint) {
143 self.push_char(ch);
144 Ok(())
145 } else {
146 Err(InvalidCodepointError::invalid_utf8_codepoint(codepoint))
147 }
148 }
149
150 #[inline]
151 pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> {
152 self.try_push_codepoint(int)
153 }
154
155 #[inline]
156 pub fn push_char(&mut self, ch: char) {
157 self.inner.push_char(ch);
158 }
159
160 #[inline]
161 pub fn push_str(&mut self, s: &str) {
162 self.inner.push_str(s);
163 }
164
165 #[inline]
166 pub fn extend_from_slice(&mut self, other: &[u8]) {
167 self.inner.extend_from_slice(other);
168 }
169}
170
171impl Utf8String {
179 #[inline]
180 pub fn make_capitalized(&mut self) -> CaseFoldingEffect {
181 let (replacement, effect) = case_change::to_utf8_capitalized(self.as_bytes());
182 self.inner = replacement.into();
184 effect
185 }
186
187 #[inline]
188 pub fn make_lowercase(&mut self) -> CaseFoldingEffect {
189 let (replacement, effect) = case_change::to_utf8_lowercase(self.as_bytes());
190 self.inner = replacement.into();
192 effect
193 }
194
195 #[inline]
196 pub fn make_uppercase(&mut self) -> CaseFoldingEffect {
197 let (replacement, effect) = case_change::to_utf8_uppercase(self.as_bytes());
198 self.inner = replacement.into();
200 effect
201 }
202
203 #[inline]
204 pub fn make_swapcase(&mut self) -> CaseFoldingEffect {
205 let (replacement, effect) = case_change::to_utf8_swapcase(self.as_bytes());
206 self.inner = replacement.into();
208 effect
209 }
210}
211
212impl Utf8String {
214 #[inline]
215 pub fn reverse(&mut self) {
216 if self.is_ascii_only() {
218 self.inner.reverse();
219 return;
220 }
221 let chars = ConventionallyUtf8::from(&self.inner[..]).collect::<Vec<_>>();
224 let mut replacement = Vec::with_capacity(self.inner.len());
227 for &bytes in chars.iter().rev() {
228 replacement.extend_from_slice(bytes);
229 }
230 self.inner = replacement.into();
231 }
232}
233
234#[cfg(test)]
235mod tests {
236 use bstr::ByteSlice;
237
238 use super::Utf8String;
239
240 #[test]
241 fn reverse_ascii() {
242 let mut s = Utf8String::from("1234");
243 s.reverse();
244 assert_eq!(s, "4321");
245 }
246
247 #[test]
248 fn reverse_ascii_with_invalid_utf8() {
249 let mut s = Utf8String::from(b"1234\xFF\xFE");
250 s.reverse();
251 assert_eq!(s, b"\xFE\xFF4321".as_bstr());
252 }
253
254 #[test]
255 fn reverse_multibyte() {
256 let mut s = Utf8String::from("怎么样");
261 s.reverse();
262 assert_eq!(s, "样么怎");
263 }
264
265 #[test]
266 fn reverse_multibyte_with_invalid_utf8() {
267 let mut s = Utf8String::from("怎么样");
272 s.extend_from_slice(b"\xFF\xFE");
273 s.reverse();
274
275 let mut expected = b"\xFE\xFF".to_vec();
276 expected.extend_from_slice("样么怎".as_bytes());
277 assert_eq!(s, expected.as_bstr());
278 }
279
280 #[test]
281 fn reverse_replacement_char_with_invalid_utf8_prefix() {
282 let mut s = Utf8String::from(b"abc\xF0\x9F\x87def\xEF\xBF\xBD");
293 s.reverse();
294 assert_eq!(s, b"\xEF\xBF\xBDfed\x87\x9F\xF0cba".as_bstr());
295 }
296}