bstr/ext_vec.rs
1use core::{fmt, iter, ops, ptr};
2
3use alloc::{borrow::Cow, string::String, vec, vec::Vec};
4
5#[cfg(feature = "std")]
6use std::{
7 error,
8 ffi::{OsStr, OsString},
9 path::{Path, PathBuf},
10};
11
12use crate::{
13 ext_slice::ByteSlice,
14 utf8::{self, Utf8Error},
15};
16
17/// Concatenate the elements given by the iterator together into a single
18/// `Vec<u8>`.
19///
20/// The elements may be any type that can be cheaply converted into an `&[u8]`.
21/// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
22///
23/// # Examples
24///
25/// Basic usage:
26///
27/// ```
28/// use bstr;
29///
30/// let s = bstr::concat(&["foo", "bar", "baz"]);
31/// assert_eq!(s, "foobarbaz".as_bytes());
32/// ```
33#[inline]
34pub fn concat<T, I>(elements: I) -> Vec<u8>
35where
36 T: AsRef<[u8]>,
37 I: IntoIterator<Item = T>,
38{
39 let mut dest = vec![];
40 for element in elements {
41 dest.push_str(element);
42 }
43 dest
44}
45
46/// Join the elements given by the iterator with the given separator into a
47/// single `Vec<u8>`.
48///
49/// Both the separator and the elements may be any type that can be cheaply
50/// converted into an `&[u8]`. This includes, but is not limited to,
51/// `&str`, `&BStr` and `&[u8]` itself.
52///
53/// # Examples
54///
55/// Basic usage:
56///
57/// ```
58/// use bstr;
59///
60/// let s = bstr::join(",", &["foo", "bar", "baz"]);
61/// assert_eq!(s, "foo,bar,baz".as_bytes());
62/// ```
63#[inline]
64pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
65where
66 B: AsRef<[u8]>,
67 T: AsRef<[u8]>,
68 I: IntoIterator<Item = T>,
69{
70 let mut it = elements.into_iter();
71 let mut dest = vec![];
72 match it.next() {
73 None => return dest,
74 Some(first) => {
75 dest.push_str(first);
76 }
77 }
78 for element in it {
79 dest.push_str(&separator);
80 dest.push_str(element);
81 }
82 dest
83}
84
85impl ByteVec for Vec<u8> {
86 #[inline]
87 fn as_vec(&self) -> &Vec<u8> {
88 self
89 }
90
91 #[inline]
92 fn as_vec_mut(&mut self) -> &mut Vec<u8> {
93 self
94 }
95
96 #[inline]
97 fn into_vec(self) -> Vec<u8> {
98 self
99 }
100}
101
102/// Ensure that callers cannot implement `ByteSlice` by making an
103/// umplementable trait its super trait.
104mod private {
105 pub trait Sealed {}
106}
107impl private::Sealed for Vec<u8> {}
108
109/// A trait that extends `Vec<u8>` with string oriented methods.
110///
111/// Note that when using the constructor methods, such as
112/// `ByteVec::from_slice`, one should actually call them using the concrete
113/// type. For example:
114///
115/// ```
116/// use bstr::{B, ByteVec};
117///
118/// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
119/// assert_eq!(s, B("abc"));
120/// ```
121///
122/// This trait is sealed and cannot be implemented outside of `bstr`.
123pub trait ByteVec: private::Sealed {
124 /// A method for accessing the raw vector bytes of this type. This is
125 /// always a no-op and callers shouldn't care about it. This only exists
126 /// for making the extension trait work.
127 #[doc(hidden)]
128 fn as_vec(&self) -> &Vec<u8>;
129
130 /// A method for accessing the raw vector bytes of this type, mutably. This
131 /// is always a no-op and callers shouldn't care about it. This only exists
132 /// for making the extension trait work.
133 #[doc(hidden)]
134 fn as_vec_mut(&mut self) -> &mut Vec<u8>;
135
136 /// A method for consuming ownership of this vector. This is always a no-op
137 /// and callers shouldn't care about it. This only exists for making the
138 /// extension trait work.
139 #[doc(hidden)]
140 fn into_vec(self) -> Vec<u8>
141 where
142 Self: Sized;
143
144 /// Create a new owned byte string from the given byte slice.
145 ///
146 /// # Examples
147 ///
148 /// Basic usage:
149 ///
150 /// ```
151 /// use bstr::{B, ByteVec};
152 ///
153 /// let s = Vec::from_slice(b"abc");
154 /// assert_eq!(s, B("abc"));
155 /// ```
156 #[inline]
157 fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
158 bytes.as_ref().to_vec()
159 }
160
161 /// Create a new byte string from an owned OS string.
162 ///
163 /// When the underlying bytes of OS strings are accessible, then this
164 /// always succeeds and is zero cost. Otherwise, this returns the given
165 /// `OsString` if it is not valid UTF-8.
166 ///
167 /// # Examples
168 ///
169 /// Basic usage:
170 ///
171 /// ```
172 /// use std::ffi::OsString;
173 ///
174 /// use bstr::{B, ByteVec};
175 ///
176 /// let os_str = OsString::from("foo");
177 /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
178 /// assert_eq!(bs, B("foo"));
179 /// ```
180 #[inline]
181 #[cfg(feature = "std")]
182 fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
183 #[cfg(unix)]
184 #[inline]
185 fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
186 use std::os::unix::ffi::OsStringExt;
187
188 Ok(os_str.into_vec())
189 }
190
191 #[cfg(not(unix))]
192 #[inline]
193 fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
194 os_str.into_string().map(Vec::from)
195 }
196
197 imp(os_str)
198 }
199
200 /// Lossily create a new byte string from an OS string slice.
201 ///
202 /// When the underlying bytes of OS strings are accessible, then this is
203 /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
204 /// performed and if the given OS string is not valid UTF-8, then it is
205 /// lossily decoded into valid UTF-8 (with invalid bytes replaced by the
206 /// Unicode replacement codepoint).
207 ///
208 /// # Examples
209 ///
210 /// Basic usage:
211 ///
212 /// ```
213 /// use std::ffi::OsStr;
214 ///
215 /// use bstr::{B, ByteVec};
216 ///
217 /// let os_str = OsStr::new("foo");
218 /// let bs = Vec::from_os_str_lossy(os_str);
219 /// assert_eq!(bs, B("foo"));
220 /// ```
221 #[inline]
222 #[cfg(feature = "std")]
223 fn from_os_str_lossy(os_str: &OsStr) -> Cow<'_, [u8]> {
224 #[cfg(unix)]
225 #[inline]
226 fn imp(os_str: &OsStr) -> Cow<'_, [u8]> {
227 use std::os::unix::ffi::OsStrExt;
228
229 Cow::Borrowed(os_str.as_bytes())
230 }
231
232 #[cfg(not(unix))]
233 #[inline]
234 fn imp(os_str: &OsStr) -> Cow<'_, [u8]> {
235 match os_str.to_string_lossy() {
236 Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
237 Cow::Owned(x) => Cow::Owned(Vec::from(x)),
238 }
239 }
240
241 imp(os_str)
242 }
243
244 /// Create a new byte string from an owned file path.
245 ///
246 /// When the underlying bytes of paths are accessible, then this always
247 /// succeeds and is zero cost. Otherwise, this returns the given `PathBuf`
248 /// if it is not valid UTF-8.
249 ///
250 /// # Examples
251 ///
252 /// Basic usage:
253 ///
254 /// ```
255 /// use std::path::PathBuf;
256 ///
257 /// use bstr::{B, ByteVec};
258 ///
259 /// let path = PathBuf::from("foo");
260 /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
261 /// assert_eq!(bs, B("foo"));
262 /// ```
263 #[inline]
264 #[cfg(feature = "std")]
265 fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
266 Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
267 }
268
269 /// Lossily create a new byte string from a file path.
270 ///
271 /// When the underlying bytes of paths are accessible, then this is
272 /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
273 /// performed and if the given path is not valid UTF-8, then it is lossily
274 /// decoded into valid UTF-8 (with invalid bytes replaced by the Unicode
275 /// replacement codepoint).
276 ///
277 /// # Examples
278 ///
279 /// Basic usage:
280 ///
281 /// ```
282 /// use std::path::Path;
283 ///
284 /// use bstr::{B, ByteVec};
285 ///
286 /// let path = Path::new("foo");
287 /// let bs = Vec::from_path_lossy(path);
288 /// assert_eq!(bs, B("foo"));
289 /// ```
290 #[inline]
291 #[cfg(feature = "std")]
292 fn from_path_lossy(path: &Path) -> Cow<'_, [u8]> {
293 Vec::from_os_str_lossy(path.as_os_str())
294 }
295
296 /// Unescapes the given string into its raw bytes.
297 ///
298 /// This looks for the escape sequences `\xNN`, `\0`, `\r`, `\n`, `\t`
299 /// and `\` and translates them into their corresponding unescaped form.
300 ///
301 /// Incomplete escape sequences or things that look like escape sequences
302 /// but are not (for example, `\i` or `\xYZ`) are passed through literally.
303 ///
304 /// This is the dual of [`ByteSlice::escape_bytes`].
305 ///
306 /// Note that the zero or NUL byte may be represented as either `\0` or
307 /// `\x00`. Both will be unescaped into the zero byte.
308 ///
309 /// # Examples
310 ///
311 /// This shows basic usage:
312 ///
313 /// ```
314 /// # #[cfg(feature = "alloc")] {
315 /// use bstr::{B, BString, ByteVec};
316 ///
317 /// assert_eq!(
318 /// BString::from(b"foo\xFFbar"),
319 /// Vec::unescape_bytes(r"foo\xFFbar"),
320 /// );
321 /// assert_eq!(
322 /// BString::from(b"foo\nbar"),
323 /// Vec::unescape_bytes(r"foo\nbar"),
324 /// );
325 /// assert_eq!(
326 /// BString::from(b"foo\tbar"),
327 /// Vec::unescape_bytes(r"foo\tbar"),
328 /// );
329 /// assert_eq!(
330 /// BString::from(b"foo\\bar"),
331 /// Vec::unescape_bytes(r"foo\\bar"),
332 /// );
333 /// assert_eq!(
334 /// BString::from("foo☃bar"),
335 /// Vec::unescape_bytes(r"foo☃bar"),
336 /// );
337 ///
338 /// # }
339 /// ```
340 ///
341 /// This shows some examples of how incomplete or "incorrect" escape
342 /// sequences get passed through literally.
343 ///
344 /// ```
345 /// # #[cfg(feature = "alloc")] {
346 /// use bstr::{B, BString, ByteVec};
347 ///
348 /// // Show some incomplete escape sequences.
349 /// assert_eq!(
350 /// BString::from(br"\"),
351 /// Vec::unescape_bytes(r"\"),
352 /// );
353 /// assert_eq!(
354 /// BString::from(br"\"),
355 /// Vec::unescape_bytes(r"\\"),
356 /// );
357 /// assert_eq!(
358 /// BString::from(br"\x"),
359 /// Vec::unescape_bytes(r"\x"),
360 /// );
361 /// assert_eq!(
362 /// BString::from(br"\xA"),
363 /// Vec::unescape_bytes(r"\xA"),
364 /// );
365 /// // And now some that kind of look like escape
366 /// // sequences, but aren't.
367 /// assert_eq!(
368 /// BString::from(br"\xZ"),
369 /// Vec::unescape_bytes(r"\xZ"),
370 /// );
371 /// assert_eq!(
372 /// BString::from(br"\xZZ"),
373 /// Vec::unescape_bytes(r"\xZZ"),
374 /// );
375 /// assert_eq!(
376 /// BString::from(br"\i"),
377 /// Vec::unescape_bytes(r"\i"),
378 /// );
379 /// assert_eq!(
380 /// BString::from(br"\u"),
381 /// Vec::unescape_bytes(r"\u"),
382 /// );
383 /// assert_eq!(
384 /// BString::from(br"\u{2603}"),
385 /// Vec::unescape_bytes(r"\u{2603}"),
386 /// );
387 ///
388 /// # }
389 /// ```
390 #[inline]
391 #[cfg(feature = "alloc")]
392 fn unescape_bytes<S: AsRef<str>>(escaped: S) -> Vec<u8> {
393 let s = escaped.as_ref();
394 crate::escape_bytes::UnescapeBytes::new(s.chars()).collect()
395 }
396
397 /// Appends the given byte to the end of this byte string.
398 ///
399 /// Note that this is equivalent to the generic `Vec::push` method. This
400 /// method is provided to permit callers to explicitly differentiate
401 /// between pushing bytes, codepoints and strings.
402 ///
403 /// # Examples
404 ///
405 /// Basic usage:
406 ///
407 /// ```
408 /// use bstr::ByteVec;
409 ///
410 /// let mut s = <Vec<u8>>::from("abc");
411 /// s.push_byte(b'\xE2');
412 /// s.push_byte(b'\x98');
413 /// s.push_byte(b'\x83');
414 /// assert_eq!(s, "abc☃".as_bytes());
415 /// ```
416 #[inline]
417 fn push_byte(&mut self, byte: u8) {
418 self.as_vec_mut().push(byte);
419 }
420
421 /// Appends the given `char` to the end of this byte string.
422 ///
423 /// # Examples
424 ///
425 /// Basic usage:
426 ///
427 /// ```
428 /// use bstr::ByteVec;
429 ///
430 /// let mut s = <Vec<u8>>::from("abc");
431 /// s.push_char('1');
432 /// s.push_char('2');
433 /// s.push_char('3');
434 /// assert_eq!(s, "abc123".as_bytes());
435 /// ```
436 #[inline]
437 fn push_char(&mut self, ch: char) {
438 if ch.len_utf8() == 1 {
439 self.push_byte(ch as u8);
440 return;
441 }
442 self.as_vec_mut()
443 .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
444 }
445
446 /// Appends the given slice to the end of this byte string. This accepts
447 /// any type that be converted to a `&[u8]`. This includes, but is not
448 /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
449 ///
450 /// # Examples
451 ///
452 /// Basic usage:
453 ///
454 /// ```
455 /// use bstr::ByteVec;
456 ///
457 /// let mut s = <Vec<u8>>::from("abc");
458 /// s.push_str(b"123");
459 /// assert_eq!(s, "abc123".as_bytes());
460 /// ```
461 #[inline]
462 fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
463 self.as_vec_mut().extend_from_slice(bytes.as_ref());
464 }
465
466 /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
467 /// valid UTF-8.
468 ///
469 /// If it is not valid UTF-8, then a
470 /// [`FromUtf8Error`](struct.FromUtf8Error.html)
471 /// is returned. (This error can be used to examine why UTF-8 validation
472 /// failed, or to regain the original byte string.)
473 ///
474 /// # Examples
475 ///
476 /// Basic usage:
477 ///
478 /// ```
479 /// use bstr::ByteVec;
480 ///
481 /// let bytes = Vec::from("hello");
482 /// let string = bytes.into_string().unwrap();
483 ///
484 /// assert_eq!("hello", string);
485 /// ```
486 ///
487 /// If this byte string is not valid UTF-8, then an error will be returned.
488 /// That error can then be used to inspect the location at which invalid
489 /// UTF-8 was found, or to regain the original byte string:
490 ///
491 /// ```
492 /// use bstr::{B, ByteVec};
493 ///
494 /// let bytes = Vec::from_slice(b"foo\xFFbar");
495 /// let err = bytes.into_string().unwrap_err();
496 ///
497 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
498 /// assert_eq!(err.utf8_error().error_len(), Some(1));
499 ///
500 /// // At no point in this example is an allocation performed.
501 /// let bytes = Vec::from(err.into_vec());
502 /// assert_eq!(bytes, B(b"foo\xFFbar"));
503 /// ```
504 #[inline]
505 fn into_string(self) -> Result<String, FromUtf8Error>
506 where
507 Self: Sized,
508 {
509 match utf8::validate(self.as_vec()) {
510 Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
511 Ok(()) => {
512 // SAFETY: This is safe because of the guarantees provided by
513 // utf8::validate.
514 unsafe { Ok(self.into_string_unchecked()) }
515 }
516 }
517 }
518
519 /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
520 /// contains invalid UTF-8, then the invalid bytes are replaced with the
521 /// Unicode replacement codepoint.
522 ///
523 /// # Examples
524 ///
525 /// Basic usage:
526 ///
527 /// ```
528 /// use bstr::ByteVec;
529 ///
530 /// let bytes = Vec::from_slice(b"foo\xFFbar");
531 /// let string = bytes.into_string_lossy();
532 /// assert_eq!(string, "foo\u{FFFD}bar");
533 /// ```
534 #[inline]
535 fn into_string_lossy(self) -> String
536 where
537 Self: Sized,
538 {
539 match self.as_vec().to_str_lossy() {
540 Cow::Borrowed(_) => {
541 // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
542 // the entire string is valid utf8.
543 unsafe { self.into_string_unchecked() }
544 }
545 Cow::Owned(s) => s,
546 }
547 }
548
549 /// Unsafely convert this byte string into a `String`, without checking for
550 /// valid UTF-8.
551 ///
552 /// # Safety
553 ///
554 /// Callers *must* ensure that this byte string is valid UTF-8 before
555 /// calling this method. Converting a byte string into a `String` that is
556 /// not valid UTF-8 is considered undefined behavior.
557 ///
558 /// This routine is useful in performance sensitive contexts where the
559 /// UTF-8 validity of the byte string is already known and it is
560 /// undesirable to pay the cost of an additional UTF-8 validation check
561 /// that [`into_string`](#method.into_string) performs.
562 ///
563 /// # Examples
564 ///
565 /// Basic usage:
566 ///
567 /// ```
568 /// use bstr::ByteVec;
569 ///
570 /// // SAFETY: This is safe because string literals are guaranteed to be
571 /// // valid UTF-8 by the Rust compiler.
572 /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
573 /// assert_eq!("☃βツ", s);
574 /// ```
575 #[inline]
576 unsafe fn into_string_unchecked(self) -> String
577 where
578 Self: Sized,
579 {
580 String::from_utf8_unchecked(self.into_vec())
581 }
582
583 /// Converts this byte string into an OS string, in place.
584 ///
585 /// When OS strings can be constructed from arbitrary byte sequences, this
586 /// always succeeds and is zero cost. Otherwise, if this byte string is not
587 /// valid UTF-8, then an error (with the original byte string) is returned.
588 ///
589 /// # Examples
590 ///
591 /// Basic usage:
592 ///
593 /// ```
594 /// use std::ffi::OsStr;
595 ///
596 /// use bstr::ByteVec;
597 ///
598 /// let bs = Vec::from("foo");
599 /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
600 /// assert_eq!(os_str, OsStr::new("foo"));
601 /// ```
602 #[cfg(feature = "std")]
603 #[inline]
604 fn into_os_string(self) -> Result<OsString, FromUtf8Error>
605 where
606 Self: Sized,
607 {
608 #[cfg(unix)]
609 #[inline]
610 fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
611 use std::os::unix::ffi::OsStringExt;
612
613 Ok(OsString::from_vec(v))
614 }
615
616 #[cfg(not(unix))]
617 #[inline]
618 fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
619 v.into_string().map(OsString::from)
620 }
621
622 imp(self.into_vec())
623 }
624
625 /// Lossily converts this byte string into an OS string, in place.
626 ///
627 /// When OS strings can be constructed from arbitrary byte sequences, this
628 /// is zero cost and always returns a slice. Otherwise, this will perform a
629 /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
630 /// the Unicode replacement codepoint.
631 ///
632 /// Note that this can prevent the correct roundtripping of file paths when
633 /// the representation of `OsString` is opaque.
634 ///
635 /// # Examples
636 ///
637 /// Basic usage:
638 ///
639 /// ```
640 /// use bstr::ByteVec;
641 ///
642 /// let bs = Vec::from_slice(b"foo\xFFbar");
643 /// let os_str = bs.into_os_string_lossy();
644 /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
645 /// ```
646 #[inline]
647 #[cfg(feature = "std")]
648 fn into_os_string_lossy(self) -> OsString
649 where
650 Self: Sized,
651 {
652 #[cfg(unix)]
653 #[inline]
654 fn imp(v: Vec<u8>) -> OsString {
655 use std::os::unix::ffi::OsStringExt;
656
657 OsString::from_vec(v)
658 }
659
660 #[cfg(not(unix))]
661 #[inline]
662 fn imp(v: Vec<u8>) -> OsString {
663 OsString::from(v.into_string_lossy())
664 }
665
666 imp(self.into_vec())
667 }
668
669 /// Converts this byte string into an owned file path, in place.
670 ///
671 /// When paths can be constructed from arbitrary byte sequences, this
672 /// always succeeds and is zero cost. Otherwise, if this byte string is not
673 /// valid UTF-8, then an error (with the original byte string) is returned.
674 ///
675 /// # Examples
676 ///
677 /// Basic usage:
678 ///
679 /// ```
680 /// use bstr::ByteVec;
681 ///
682 /// let bs = Vec::from("foo");
683 /// let path = bs.into_path_buf().expect("should be valid UTF-8");
684 /// assert_eq!(path.as_os_str(), "foo");
685 /// ```
686 #[cfg(feature = "std")]
687 #[inline]
688 fn into_path_buf(self) -> Result<PathBuf, FromUtf8Error>
689 where
690 Self: Sized,
691 {
692 self.into_os_string().map(PathBuf::from)
693 }
694
695 /// Lossily converts this byte string into an owned file path, in place.
696 ///
697 /// When paths can be constructed from arbitrary byte sequences, this is
698 /// zero cost and always returns a slice. Otherwise, this will perform a
699 /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
700 /// the Unicode replacement codepoint.
701 ///
702 /// Note that this can prevent the correct roundtripping of file paths when
703 /// the representation of `PathBuf` is opaque.
704 ///
705 /// # Examples
706 ///
707 /// Basic usage:
708 ///
709 /// ```
710 /// use bstr::ByteVec;
711 ///
712 /// let bs = Vec::from_slice(b"foo\xFFbar");
713 /// let path = bs.into_path_buf_lossy();
714 /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
715 /// ```
716 #[inline]
717 #[cfg(feature = "std")]
718 fn into_path_buf_lossy(self) -> PathBuf
719 where
720 Self: Sized,
721 {
722 PathBuf::from(self.into_os_string_lossy())
723 }
724
725 /// Removes the last byte from this `Vec<u8>` and returns it.
726 ///
727 /// If this byte string is empty, then `None` is returned.
728 ///
729 /// If the last codepoint in this byte string is not ASCII, then removing
730 /// the last byte could make this byte string contain invalid UTF-8.
731 ///
732 /// Note that this is equivalent to the generic `Vec::pop` method. This
733 /// method is provided to permit callers to explicitly differentiate
734 /// between popping bytes and codepoints.
735 ///
736 /// # Examples
737 ///
738 /// Basic usage:
739 ///
740 /// ```
741 /// use bstr::ByteVec;
742 ///
743 /// let mut s = Vec::from("foo");
744 /// assert_eq!(s.pop_byte(), Some(b'o'));
745 /// assert_eq!(s.pop_byte(), Some(b'o'));
746 /// assert_eq!(s.pop_byte(), Some(b'f'));
747 /// assert_eq!(s.pop_byte(), None);
748 /// ```
749 #[inline]
750 fn pop_byte(&mut self) -> Option<u8> {
751 self.as_vec_mut().pop()
752 }
753
754 /// Removes the last codepoint from this `Vec<u8>` and returns it.
755 ///
756 /// If this byte string is empty, then `None` is returned. If the last
757 /// bytes of this byte string do not correspond to a valid UTF-8 code unit
758 /// sequence, then the Unicode replacement codepoint is yielded instead in
759 /// accordance with the
760 /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
761 ///
762 /// # Examples
763 ///
764 /// Basic usage:
765 ///
766 /// ```
767 /// use bstr::ByteVec;
768 ///
769 /// let mut s = Vec::from("foo");
770 /// assert_eq!(s.pop_char(), Some('o'));
771 /// assert_eq!(s.pop_char(), Some('o'));
772 /// assert_eq!(s.pop_char(), Some('f'));
773 /// assert_eq!(s.pop_char(), None);
774 /// ```
775 ///
776 /// This shows the replacement codepoint substitution policy. Note that
777 /// the first pop yields a replacement codepoint but actually removes two
778 /// bytes. This is in contrast with subsequent pops when encountering
779 /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
780 /// code unit sequence.
781 ///
782 /// ```
783 /// use bstr::ByteVec;
784 ///
785 /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
786 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
787 /// assert_eq!(s.pop_char(), Some('o'));
788 /// assert_eq!(s.pop_char(), Some('o'));
789 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
790 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
791 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
792 /// assert_eq!(s.pop_char(), Some('f'));
793 /// assert_eq!(s.pop_char(), None);
794 /// ```
795 #[inline]
796 fn pop_char(&mut self) -> Option<char> {
797 let (ch, size) = utf8::decode_last_lossy(self.as_vec());
798 if size == 0 {
799 return None;
800 }
801 let new_len = self.as_vec().len() - size;
802 self.as_vec_mut().truncate(new_len);
803 Some(ch)
804 }
805
806 /// Removes a `char` from this `Vec<u8>` at the given byte position and
807 /// returns it.
808 ///
809 /// If the bytes at the given position do not lead to a valid UTF-8 code
810 /// unit sequence, then a
811 /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
812 ///
813 /// # Panics
814 ///
815 /// Panics if `at` is larger than or equal to this byte string's length.
816 ///
817 /// # Examples
818 ///
819 /// Basic usage:
820 ///
821 /// ```
822 /// use bstr::ByteVec;
823 ///
824 /// let mut s = Vec::from("foo☃bar");
825 /// assert_eq!(s.remove_char(3), '☃');
826 /// assert_eq!(s, b"foobar");
827 /// ```
828 ///
829 /// This example shows how the Unicode replacement codepoint policy is
830 /// used:
831 ///
832 /// ```
833 /// use bstr::ByteVec;
834 ///
835 /// let mut s = Vec::from_slice(b"foo\xFFbar");
836 /// assert_eq!(s.remove_char(3), '\u{FFFD}');
837 /// assert_eq!(s, b"foobar");
838 /// ```
839 #[inline]
840 fn remove_char(&mut self, at: usize) -> char {
841 let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
842 assert!(
843 size > 0,
844 "expected {} to be less than {}",
845 at,
846 self.as_vec().len(),
847 );
848 self.as_vec_mut().drain(at..at + size);
849 ch
850 }
851
852 /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
853 /// position.
854 ///
855 /// This is an `O(n)` operation as it may copy a number of elements in this
856 /// byte string proportional to its length.
857 ///
858 /// # Panics
859 ///
860 /// Panics if `at` is larger than the byte string's length.
861 ///
862 /// # Examples
863 ///
864 /// Basic usage:
865 ///
866 /// ```
867 /// use bstr::ByteVec;
868 ///
869 /// let mut s = Vec::from("foobar");
870 /// s.insert_char(3, '☃');
871 /// assert_eq!(s, "foo☃bar".as_bytes());
872 /// ```
873 #[inline]
874 fn insert_char(&mut self, at: usize, ch: char) {
875 self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
876 }
877
878 /// Inserts the given byte string into this byte string at a particular
879 /// byte position.
880 ///
881 /// This is an `O(n)` operation as it may copy a number of elements in this
882 /// byte string proportional to its length.
883 ///
884 /// The given byte string may be any type that can be cheaply converted
885 /// into a `&[u8]`. This includes, but is not limited to, `&str` and
886 /// `&[u8]`.
887 ///
888 /// # Panics
889 ///
890 /// Panics if `at` is larger than the byte string's length.
891 ///
892 /// # Examples
893 ///
894 /// Basic usage:
895 ///
896 /// ```
897 /// use bstr::ByteVec;
898 ///
899 /// let mut s = Vec::from("foobar");
900 /// s.insert_str(3, "☃☃☃");
901 /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
902 /// ```
903 #[inline]
904 fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
905 let bytes = bytes.as_ref();
906 let len = self.as_vec().len();
907 assert!(at <= len, "expected {} to be <= {}", at, len);
908
909 // SAFETY: We'd like to efficiently splice in the given bytes into
910 // this byte string. Since we are only working with `u8` elements here,
911 // we only need to consider whether our bounds are correct and whether
912 // our byte string has enough space.
913 self.as_vec_mut().reserve(bytes.len());
914 unsafe {
915 // Shift bytes after `at` over by the length of `bytes` to make
916 // room for it. This requires referencing two regions of memory
917 // that may overlap, so we use ptr::copy.
918 ptr::copy(
919 self.as_vec().as_ptr().add(at),
920 self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
921 len - at,
922 );
923 // Now copy the bytes given into the room we made above. In this
924 // case, we know that the given bytes cannot possibly overlap
925 // with this byte string since we have a mutable borrow of the
926 // latter. Thus, we can use a nonoverlapping copy.
927 ptr::copy_nonoverlapping(
928 bytes.as_ptr(),
929 self.as_vec_mut().as_mut_ptr().add(at),
930 bytes.len(),
931 );
932 self.as_vec_mut().set_len(len + bytes.len());
933 }
934 }
935
936 /// Removes the specified range in this byte string and replaces it with
937 /// the given bytes. The given bytes do not need to have the same length
938 /// as the range provided.
939 ///
940 /// # Panics
941 ///
942 /// Panics if the given range is invalid.
943 ///
944 /// # Examples
945 ///
946 /// Basic usage:
947 ///
948 /// ```
949 /// use bstr::ByteVec;
950 ///
951 /// let mut s = Vec::from("foobar");
952 /// s.replace_range(2..4, "xxxxx");
953 /// assert_eq!(s, "foxxxxxar".as_bytes());
954 /// ```
955 #[inline]
956 fn replace_range<R, B>(&mut self, range: R, replace_with: B)
957 where
958 R: ops::RangeBounds<usize>,
959 B: AsRef<[u8]>,
960 {
961 self.as_vec_mut().splice(range, replace_with.as_ref().iter().copied());
962 }
963
964 /// Creates a draining iterator that removes the specified range in this
965 /// `Vec<u8>` and yields each of the removed bytes.
966 ///
967 /// Note that the elements specified by the given range are removed
968 /// regardless of whether the returned iterator is fully exhausted.
969 ///
970 /// Also note that is is unspecified how many bytes are removed from the
971 /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
972 ///
973 /// # Panics
974 ///
975 /// Panics if the given range is not valid.
976 ///
977 /// # Examples
978 ///
979 /// Basic usage:
980 ///
981 /// ```
982 /// use bstr::ByteVec;
983 ///
984 /// let mut s = Vec::from("foobar");
985 /// {
986 /// let mut drainer = s.drain_bytes(2..4);
987 /// assert_eq!(drainer.next(), Some(b'o'));
988 /// assert_eq!(drainer.next(), Some(b'b'));
989 /// assert_eq!(drainer.next(), None);
990 /// }
991 /// assert_eq!(s, "foar".as_bytes());
992 /// ```
993 #[inline]
994 fn drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_>
995 where
996 R: ops::RangeBounds<usize>,
997 {
998 DrainBytes { it: self.as_vec_mut().drain(range) }
999 }
1000}
1001
1002/// A draining byte oriented iterator for `Vec<u8>`.
1003///
1004/// This iterator is created by
1005/// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
1006///
1007/// # Examples
1008///
1009/// Basic usage:
1010///
1011/// ```
1012/// use bstr::ByteVec;
1013///
1014/// let mut s = Vec::from("foobar");
1015/// {
1016/// let mut drainer = s.drain_bytes(2..4);
1017/// assert_eq!(drainer.next(), Some(b'o'));
1018/// assert_eq!(drainer.next(), Some(b'b'));
1019/// assert_eq!(drainer.next(), None);
1020/// }
1021/// assert_eq!(s, "foar".as_bytes());
1022/// ```
1023#[derive(Debug)]
1024pub struct DrainBytes<'a> {
1025 it: vec::Drain<'a, u8>,
1026}
1027
1028impl<'a> iter::FusedIterator for DrainBytes<'a> {}
1029
1030impl<'a> Iterator for DrainBytes<'a> {
1031 type Item = u8;
1032
1033 #[inline]
1034 fn next(&mut self) -> Option<u8> {
1035 self.it.next()
1036 }
1037}
1038
1039impl<'a> DoubleEndedIterator for DrainBytes<'a> {
1040 #[inline]
1041 fn next_back(&mut self) -> Option<u8> {
1042 self.it.next_back()
1043 }
1044}
1045
1046impl<'a> ExactSizeIterator for DrainBytes<'a> {
1047 #[inline]
1048 fn len(&self) -> usize {
1049 self.it.len()
1050 }
1051}
1052
1053/// An error that may occur when converting a `Vec<u8>` to a `String`.
1054///
1055/// This error includes the original `Vec<u8>` that failed to convert to a
1056/// `String`. This permits callers to recover the allocation used even if it
1057/// it not valid UTF-8.
1058///
1059/// # Examples
1060///
1061/// Basic usage:
1062///
1063/// ```
1064/// use bstr::{B, ByteVec};
1065///
1066/// let bytes = Vec::from_slice(b"foo\xFFbar");
1067/// let err = bytes.into_string().unwrap_err();
1068///
1069/// assert_eq!(err.utf8_error().valid_up_to(), 3);
1070/// assert_eq!(err.utf8_error().error_len(), Some(1));
1071///
1072/// // At no point in this example is an allocation performed.
1073/// let bytes = Vec::from(err.into_vec());
1074/// assert_eq!(bytes, B(b"foo\xFFbar"));
1075/// ```
1076#[derive(Debug, Eq, PartialEq)]
1077pub struct FromUtf8Error {
1078 original: Vec<u8>,
1079 err: Utf8Error,
1080}
1081
1082impl FromUtf8Error {
1083 /// Return the original bytes as a slice that failed to convert to a
1084 /// `String`.
1085 ///
1086 /// # Examples
1087 ///
1088 /// Basic usage:
1089 ///
1090 /// ```
1091 /// use bstr::{B, ByteVec};
1092 ///
1093 /// let bytes = Vec::from_slice(b"foo\xFFbar");
1094 /// let err = bytes.into_string().unwrap_err();
1095 ///
1096 /// // At no point in this example is an allocation performed.
1097 /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
1098 /// ```
1099 #[inline]
1100 pub fn as_bytes(&self) -> &[u8] {
1101 &self.original
1102 }
1103
1104 /// Consume this error and return the original byte string that failed to
1105 /// convert to a `String`.
1106 ///
1107 /// # Examples
1108 ///
1109 /// Basic usage:
1110 ///
1111 /// ```
1112 /// use bstr::{B, ByteVec};
1113 ///
1114 /// let bytes = Vec::from_slice(b"foo\xFFbar");
1115 /// let err = bytes.into_string().unwrap_err();
1116 /// let original = err.into_vec();
1117 ///
1118 /// // At no point in this example is an allocation performed.
1119 /// assert_eq!(original, B(b"foo\xFFbar"));
1120 /// ```
1121 #[inline]
1122 pub fn into_vec(self) -> Vec<u8> {
1123 self.original
1124 }
1125
1126 /// Return the underlying UTF-8 error that occurred. This error provides
1127 /// information on the nature and location of the invalid UTF-8 detected.
1128 ///
1129 /// # Examples
1130 ///
1131 /// Basic usage:
1132 ///
1133 /// ```
1134 /// use bstr::{B, ByteVec};
1135 ///
1136 /// let bytes = Vec::from_slice(b"foo\xFFbar");
1137 /// let err = bytes.into_string().unwrap_err();
1138 ///
1139 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
1140 /// assert_eq!(err.utf8_error().error_len(), Some(1));
1141 /// ```
1142 #[inline]
1143 pub fn utf8_error(&self) -> &Utf8Error {
1144 &self.err
1145 }
1146}
1147
1148#[cfg(feature = "std")]
1149impl error::Error for FromUtf8Error {
1150 #[inline]
1151 fn description(&self) -> &str {
1152 "invalid UTF-8 vector"
1153 }
1154}
1155
1156impl fmt::Display for FromUtf8Error {
1157 #[inline]
1158 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1159 write!(f, "{}", self.err)
1160 }
1161}
1162
1163#[cfg(all(test, feature = "std"))]
1164mod tests {
1165 use alloc::{vec, vec::Vec};
1166
1167 use crate::ext_vec::ByteVec;
1168
1169 #[test]
1170 fn insert() {
1171 let mut s = vec![];
1172 s.insert_str(0, "foo");
1173 assert_eq!(s, "foo".as_bytes());
1174
1175 let mut s = Vec::from("a");
1176 s.insert_str(0, "foo");
1177 assert_eq!(s, "fooa".as_bytes());
1178
1179 let mut s = Vec::from("a");
1180 s.insert_str(1, "foo");
1181 assert_eq!(s, "afoo".as_bytes());
1182
1183 let mut s = Vec::from("foobar");
1184 s.insert_str(3, "quux");
1185 assert_eq!(s, "fooquuxbar".as_bytes());
1186
1187 let mut s = Vec::from("foobar");
1188 s.insert_str(3, "x");
1189 assert_eq!(s, "fooxbar".as_bytes());
1190
1191 let mut s = Vec::from("foobar");
1192 s.insert_str(0, "x");
1193 assert_eq!(s, "xfoobar".as_bytes());
1194
1195 let mut s = Vec::from("foobar");
1196 s.insert_str(6, "x");
1197 assert_eq!(s, "foobarx".as_bytes());
1198
1199 let mut s = Vec::from("foobar");
1200 s.insert_str(3, "quuxbazquux");
1201 assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
1202 }
1203
1204 #[test]
1205 #[should_panic]
1206 fn insert_fail1() {
1207 let mut s = vec![];
1208 s.insert_str(1, "foo");
1209 }
1210
1211 #[test]
1212 #[should_panic]
1213 fn insert_fail2() {
1214 let mut s = Vec::from("a");
1215 s.insert_str(2, "foo");
1216 }
1217
1218 #[test]
1219 #[should_panic]
1220 fn insert_fail3() {
1221 let mut s = Vec::from("foobar");
1222 s.insert_str(7, "foo");
1223 }
1224}