bstr/
ext_vec.rs

1use core::{fmt, iter, ops, ptr};
2
3use alloc::{borrow::Cow, string::String, vec, vec::Vec};
4
5#[cfg(feature = "std")]
6use std::{
7    error,
8    ffi::{OsStr, OsString},
9    path::{Path, PathBuf},
10};
11
12use crate::{
13    ext_slice::ByteSlice,
14    utf8::{self, Utf8Error},
15};
16
17/// Concatenate the elements given by the iterator together into a single
18/// `Vec<u8>`.
19///
20/// The elements may be any type that can be cheaply converted into an `&[u8]`.
21/// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
22///
23/// # Examples
24///
25/// Basic usage:
26///
27/// ```
28/// use bstr;
29///
30/// let s = bstr::concat(&["foo", "bar", "baz"]);
31/// assert_eq!(s, "foobarbaz".as_bytes());
32/// ```
33#[inline]
34pub fn concat<T, I>(elements: I) -> Vec<u8>
35where
36    T: AsRef<[u8]>,
37    I: IntoIterator<Item = T>,
38{
39    let mut dest = vec![];
40    for element in elements {
41        dest.push_str(element);
42    }
43    dest
44}
45
46/// Join the elements given by the iterator with the given separator into a
47/// single `Vec<u8>`.
48///
49/// Both the separator and the elements may be any type that can be cheaply
50/// converted into an `&[u8]`. This includes, but is not limited to,
51/// `&str`, `&BStr` and `&[u8]` itself.
52///
53/// # Examples
54///
55/// Basic usage:
56///
57/// ```
58/// use bstr;
59///
60/// let s = bstr::join(",", &["foo", "bar", "baz"]);
61/// assert_eq!(s, "foo,bar,baz".as_bytes());
62/// ```
63#[inline]
64pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
65where
66    B: AsRef<[u8]>,
67    T: AsRef<[u8]>,
68    I: IntoIterator<Item = T>,
69{
70    let mut it = elements.into_iter();
71    let mut dest = vec![];
72    match it.next() {
73        None => return dest,
74        Some(first) => {
75            dest.push_str(first);
76        }
77    }
78    for element in it {
79        dest.push_str(&separator);
80        dest.push_str(element);
81    }
82    dest
83}
84
85impl ByteVec for Vec<u8> {
86    #[inline]
87    fn as_vec(&self) -> &Vec<u8> {
88        self
89    }
90
91    #[inline]
92    fn as_vec_mut(&mut self) -> &mut Vec<u8> {
93        self
94    }
95
96    #[inline]
97    fn into_vec(self) -> Vec<u8> {
98        self
99    }
100}
101
102/// Ensure that callers cannot implement `ByteSlice` by making an
103/// umplementable trait its super trait.
104mod private {
105    pub trait Sealed {}
106}
107impl private::Sealed for Vec<u8> {}
108
109/// A trait that extends `Vec<u8>` with string oriented methods.
110///
111/// Note that when using the constructor methods, such as
112/// `ByteVec::from_slice`, one should actually call them using the concrete
113/// type. For example:
114///
115/// ```
116/// use bstr::{B, ByteVec};
117///
118/// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
119/// assert_eq!(s, B("abc"));
120/// ```
121///
122/// This trait is sealed and cannot be implemented outside of `bstr`.
123pub trait ByteVec: private::Sealed {
124    /// A method for accessing the raw vector bytes of this type. This is
125    /// always a no-op and callers shouldn't care about it. This only exists
126    /// for making the extension trait work.
127    #[doc(hidden)]
128    fn as_vec(&self) -> &Vec<u8>;
129
130    /// A method for accessing the raw vector bytes of this type, mutably. This
131    /// is always a no-op and callers shouldn't care about it. This only exists
132    /// for making the extension trait work.
133    #[doc(hidden)]
134    fn as_vec_mut(&mut self) -> &mut Vec<u8>;
135
136    /// A method for consuming ownership of this vector. This is always a no-op
137    /// and callers shouldn't care about it. This only exists for making the
138    /// extension trait work.
139    #[doc(hidden)]
140    fn into_vec(self) -> Vec<u8>
141    where
142        Self: Sized;
143
144    /// Create a new owned byte string from the given byte slice.
145    ///
146    /// # Examples
147    ///
148    /// Basic usage:
149    ///
150    /// ```
151    /// use bstr::{B, ByteVec};
152    ///
153    /// let s = Vec::from_slice(b"abc");
154    /// assert_eq!(s, B("abc"));
155    /// ```
156    #[inline]
157    fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
158        bytes.as_ref().to_vec()
159    }
160
161    /// Create a new byte string from an owned OS string.
162    ///
163    /// When the underlying bytes of OS strings are accessible, then this
164    /// always succeeds and is zero cost. Otherwise, this returns the given
165    /// `OsString` if it is not valid UTF-8.
166    ///
167    /// # Examples
168    ///
169    /// Basic usage:
170    ///
171    /// ```
172    /// use std::ffi::OsString;
173    ///
174    /// use bstr::{B, ByteVec};
175    ///
176    /// let os_str = OsString::from("foo");
177    /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
178    /// assert_eq!(bs, B("foo"));
179    /// ```
180    #[inline]
181    #[cfg(feature = "std")]
182    fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
183        #[cfg(unix)]
184        #[inline]
185        fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
186            use std::os::unix::ffi::OsStringExt;
187
188            Ok(os_str.into_vec())
189        }
190
191        #[cfg(not(unix))]
192        #[inline]
193        fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
194            os_str.into_string().map(Vec::from)
195        }
196
197        imp(os_str)
198    }
199
200    /// Lossily create a new byte string from an OS string slice.
201    ///
202    /// When the underlying bytes of OS strings are accessible, then this is
203    /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
204    /// performed and if the given OS string is not valid UTF-8, then it is
205    /// lossily decoded into valid UTF-8 (with invalid bytes replaced by the
206    /// Unicode replacement codepoint).
207    ///
208    /// # Examples
209    ///
210    /// Basic usage:
211    ///
212    /// ```
213    /// use std::ffi::OsStr;
214    ///
215    /// use bstr::{B, ByteVec};
216    ///
217    /// let os_str = OsStr::new("foo");
218    /// let bs = Vec::from_os_str_lossy(os_str);
219    /// assert_eq!(bs, B("foo"));
220    /// ```
221    #[inline]
222    #[cfg(feature = "std")]
223    fn from_os_str_lossy(os_str: &OsStr) -> Cow<'_, [u8]> {
224        #[cfg(unix)]
225        #[inline]
226        fn imp(os_str: &OsStr) -> Cow<'_, [u8]> {
227            use std::os::unix::ffi::OsStrExt;
228
229            Cow::Borrowed(os_str.as_bytes())
230        }
231
232        #[cfg(not(unix))]
233        #[inline]
234        fn imp(os_str: &OsStr) -> Cow<'_, [u8]> {
235            match os_str.to_string_lossy() {
236                Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
237                Cow::Owned(x) => Cow::Owned(Vec::from(x)),
238            }
239        }
240
241        imp(os_str)
242    }
243
244    /// Create a new byte string from an owned file path.
245    ///
246    /// When the underlying bytes of paths are accessible, then this always
247    /// succeeds and is zero cost. Otherwise, this returns the given `PathBuf`
248    /// if it is not valid UTF-8.
249    ///
250    /// # Examples
251    ///
252    /// Basic usage:
253    ///
254    /// ```
255    /// use std::path::PathBuf;
256    ///
257    /// use bstr::{B, ByteVec};
258    ///
259    /// let path = PathBuf::from("foo");
260    /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
261    /// assert_eq!(bs, B("foo"));
262    /// ```
263    #[inline]
264    #[cfg(feature = "std")]
265    fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
266        Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
267    }
268
269    /// Lossily create a new byte string from a file path.
270    ///
271    /// When the underlying bytes of paths are accessible, then this is
272    /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
273    /// performed and if the given path is not valid UTF-8, then it is lossily
274    /// decoded into valid UTF-8 (with invalid bytes replaced by the Unicode
275    /// replacement codepoint).
276    ///
277    /// # Examples
278    ///
279    /// Basic usage:
280    ///
281    /// ```
282    /// use std::path::Path;
283    ///
284    /// use bstr::{B, ByteVec};
285    ///
286    /// let path = Path::new("foo");
287    /// let bs = Vec::from_path_lossy(path);
288    /// assert_eq!(bs, B("foo"));
289    /// ```
290    #[inline]
291    #[cfg(feature = "std")]
292    fn from_path_lossy(path: &Path) -> Cow<'_, [u8]> {
293        Vec::from_os_str_lossy(path.as_os_str())
294    }
295
296    /// Unescapes the given string into its raw bytes.
297    ///
298    /// This looks for the escape sequences `\xNN`, `\0`, `\r`, `\n`, `\t`
299    /// and `\` and translates them into their corresponding unescaped form.
300    ///
301    /// Incomplete escape sequences or things that look like escape sequences
302    /// but are not (for example, `\i` or `\xYZ`) are passed through literally.
303    ///
304    /// This is the dual of [`ByteSlice::escape_bytes`].
305    ///
306    /// Note that the zero or NUL byte may be represented as either `\0` or
307    /// `\x00`. Both will be unescaped into the zero byte.
308    ///
309    /// # Examples
310    ///
311    /// This shows basic usage:
312    ///
313    /// ```
314    /// # #[cfg(feature = "alloc")] {
315    /// use bstr::{B, BString, ByteVec};
316    ///
317    /// assert_eq!(
318    ///     BString::from(b"foo\xFFbar"),
319    ///     Vec::unescape_bytes(r"foo\xFFbar"),
320    /// );
321    /// assert_eq!(
322    ///     BString::from(b"foo\nbar"),
323    ///     Vec::unescape_bytes(r"foo\nbar"),
324    /// );
325    /// assert_eq!(
326    ///     BString::from(b"foo\tbar"),
327    ///     Vec::unescape_bytes(r"foo\tbar"),
328    /// );
329    /// assert_eq!(
330    ///     BString::from(b"foo\\bar"),
331    ///     Vec::unescape_bytes(r"foo\\bar"),
332    /// );
333    /// assert_eq!(
334    ///     BString::from("foo☃bar"),
335    ///     Vec::unescape_bytes(r"foo☃bar"),
336    /// );
337    ///
338    /// # }
339    /// ```
340    ///
341    /// This shows some examples of how incomplete or "incorrect" escape
342    /// sequences get passed through literally.
343    ///
344    /// ```
345    /// # #[cfg(feature = "alloc")] {
346    /// use bstr::{B, BString, ByteVec};
347    ///
348    /// // Show some incomplete escape sequences.
349    /// assert_eq!(
350    ///     BString::from(br"\"),
351    ///     Vec::unescape_bytes(r"\"),
352    /// );
353    /// assert_eq!(
354    ///     BString::from(br"\"),
355    ///     Vec::unescape_bytes(r"\\"),
356    /// );
357    /// assert_eq!(
358    ///     BString::from(br"\x"),
359    ///     Vec::unescape_bytes(r"\x"),
360    /// );
361    /// assert_eq!(
362    ///     BString::from(br"\xA"),
363    ///     Vec::unescape_bytes(r"\xA"),
364    /// );
365    /// // And now some that kind of look like escape
366    /// // sequences, but aren't.
367    /// assert_eq!(
368    ///     BString::from(br"\xZ"),
369    ///     Vec::unescape_bytes(r"\xZ"),
370    /// );
371    /// assert_eq!(
372    ///     BString::from(br"\xZZ"),
373    ///     Vec::unescape_bytes(r"\xZZ"),
374    /// );
375    /// assert_eq!(
376    ///     BString::from(br"\i"),
377    ///     Vec::unescape_bytes(r"\i"),
378    /// );
379    /// assert_eq!(
380    ///     BString::from(br"\u"),
381    ///     Vec::unescape_bytes(r"\u"),
382    /// );
383    /// assert_eq!(
384    ///     BString::from(br"\u{2603}"),
385    ///     Vec::unescape_bytes(r"\u{2603}"),
386    /// );
387    ///
388    /// # }
389    /// ```
390    #[inline]
391    #[cfg(feature = "alloc")]
392    fn unescape_bytes<S: AsRef<str>>(escaped: S) -> Vec<u8> {
393        let s = escaped.as_ref();
394        crate::escape_bytes::UnescapeBytes::new(s.chars()).collect()
395    }
396
397    /// Appends the given byte to the end of this byte string.
398    ///
399    /// Note that this is equivalent to the generic `Vec::push` method. This
400    /// method is provided to permit callers to explicitly differentiate
401    /// between pushing bytes, codepoints and strings.
402    ///
403    /// # Examples
404    ///
405    /// Basic usage:
406    ///
407    /// ```
408    /// use bstr::ByteVec;
409    ///
410    /// let mut s = <Vec<u8>>::from("abc");
411    /// s.push_byte(b'\xE2');
412    /// s.push_byte(b'\x98');
413    /// s.push_byte(b'\x83');
414    /// assert_eq!(s, "abc☃".as_bytes());
415    /// ```
416    #[inline]
417    fn push_byte(&mut self, byte: u8) {
418        self.as_vec_mut().push(byte);
419    }
420
421    /// Appends the given `char` to the end of this byte string.
422    ///
423    /// # Examples
424    ///
425    /// Basic usage:
426    ///
427    /// ```
428    /// use bstr::ByteVec;
429    ///
430    /// let mut s = <Vec<u8>>::from("abc");
431    /// s.push_char('1');
432    /// s.push_char('2');
433    /// s.push_char('3');
434    /// assert_eq!(s, "abc123".as_bytes());
435    /// ```
436    #[inline]
437    fn push_char(&mut self, ch: char) {
438        if ch.len_utf8() == 1 {
439            self.push_byte(ch as u8);
440            return;
441        }
442        self.as_vec_mut()
443            .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
444    }
445
446    /// Appends the given slice to the end of this byte string. This accepts
447    /// any type that be converted to a `&[u8]`. This includes, but is not
448    /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
449    ///
450    /// # Examples
451    ///
452    /// Basic usage:
453    ///
454    /// ```
455    /// use bstr::ByteVec;
456    ///
457    /// let mut s = <Vec<u8>>::from("abc");
458    /// s.push_str(b"123");
459    /// assert_eq!(s, "abc123".as_bytes());
460    /// ```
461    #[inline]
462    fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
463        self.as_vec_mut().extend_from_slice(bytes.as_ref());
464    }
465
466    /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
467    /// valid UTF-8.
468    ///
469    /// If it is not valid UTF-8, then a
470    /// [`FromUtf8Error`](struct.FromUtf8Error.html)
471    /// is returned. (This error can be used to examine why UTF-8 validation
472    /// failed, or to regain the original byte string.)
473    ///
474    /// # Examples
475    ///
476    /// Basic usage:
477    ///
478    /// ```
479    /// use bstr::ByteVec;
480    ///
481    /// let bytes = Vec::from("hello");
482    /// let string = bytes.into_string().unwrap();
483    ///
484    /// assert_eq!("hello", string);
485    /// ```
486    ///
487    /// If this byte string is not valid UTF-8, then an error will be returned.
488    /// That error can then be used to inspect the location at which invalid
489    /// UTF-8 was found, or to regain the original byte string:
490    ///
491    /// ```
492    /// use bstr::{B, ByteVec};
493    ///
494    /// let bytes = Vec::from_slice(b"foo\xFFbar");
495    /// let err = bytes.into_string().unwrap_err();
496    ///
497    /// assert_eq!(err.utf8_error().valid_up_to(), 3);
498    /// assert_eq!(err.utf8_error().error_len(), Some(1));
499    ///
500    /// // At no point in this example is an allocation performed.
501    /// let bytes = Vec::from(err.into_vec());
502    /// assert_eq!(bytes, B(b"foo\xFFbar"));
503    /// ```
504    #[inline]
505    fn into_string(self) -> Result<String, FromUtf8Error>
506    where
507        Self: Sized,
508    {
509        match utf8::validate(self.as_vec()) {
510            Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
511            Ok(()) => {
512                // SAFETY: This is safe because of the guarantees provided by
513                // utf8::validate.
514                unsafe { Ok(self.into_string_unchecked()) }
515            }
516        }
517    }
518
519    /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
520    /// contains invalid UTF-8, then the invalid bytes are replaced with the
521    /// Unicode replacement codepoint.
522    ///
523    /// # Examples
524    ///
525    /// Basic usage:
526    ///
527    /// ```
528    /// use bstr::ByteVec;
529    ///
530    /// let bytes = Vec::from_slice(b"foo\xFFbar");
531    /// let string = bytes.into_string_lossy();
532    /// assert_eq!(string, "foo\u{FFFD}bar");
533    /// ```
534    #[inline]
535    fn into_string_lossy(self) -> String
536    where
537        Self: Sized,
538    {
539        match self.as_vec().to_str_lossy() {
540            Cow::Borrowed(_) => {
541                // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
542                // the entire string is valid utf8.
543                unsafe { self.into_string_unchecked() }
544            }
545            Cow::Owned(s) => s,
546        }
547    }
548
549    /// Unsafely convert this byte string into a `String`, without checking for
550    /// valid UTF-8.
551    ///
552    /// # Safety
553    ///
554    /// Callers *must* ensure that this byte string is valid UTF-8 before
555    /// calling this method. Converting a byte string into a `String` that is
556    /// not valid UTF-8 is considered undefined behavior.
557    ///
558    /// This routine is useful in performance sensitive contexts where the
559    /// UTF-8 validity of the byte string is already known and it is
560    /// undesirable to pay the cost of an additional UTF-8 validation check
561    /// that [`into_string`](#method.into_string) performs.
562    ///
563    /// # Examples
564    ///
565    /// Basic usage:
566    ///
567    /// ```
568    /// use bstr::ByteVec;
569    ///
570    /// // SAFETY: This is safe because string literals are guaranteed to be
571    /// // valid UTF-8 by the Rust compiler.
572    /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
573    /// assert_eq!("☃βツ", s);
574    /// ```
575    #[inline]
576    unsafe fn into_string_unchecked(self) -> String
577    where
578        Self: Sized,
579    {
580        String::from_utf8_unchecked(self.into_vec())
581    }
582
583    /// Converts this byte string into an OS string, in place.
584    ///
585    /// When OS strings can be constructed from arbitrary byte sequences, this
586    /// always succeeds and is zero cost. Otherwise, if this byte string is not
587    /// valid UTF-8, then an error (with the original byte string) is returned.
588    ///
589    /// # Examples
590    ///
591    /// Basic usage:
592    ///
593    /// ```
594    /// use std::ffi::OsStr;
595    ///
596    /// use bstr::ByteVec;
597    ///
598    /// let bs = Vec::from("foo");
599    /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
600    /// assert_eq!(os_str, OsStr::new("foo"));
601    /// ```
602    #[cfg(feature = "std")]
603    #[inline]
604    fn into_os_string(self) -> Result<OsString, FromUtf8Error>
605    where
606        Self: Sized,
607    {
608        #[cfg(unix)]
609        #[inline]
610        fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
611            use std::os::unix::ffi::OsStringExt;
612
613            Ok(OsString::from_vec(v))
614        }
615
616        #[cfg(not(unix))]
617        #[inline]
618        fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
619            v.into_string().map(OsString::from)
620        }
621
622        imp(self.into_vec())
623    }
624
625    /// Lossily converts this byte string into an OS string, in place.
626    ///
627    /// When OS strings can be constructed from arbitrary byte sequences, this
628    /// is zero cost and always returns a slice. Otherwise, this will perform a
629    /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
630    /// the Unicode replacement codepoint.
631    ///
632    /// Note that this can prevent the correct roundtripping of file paths when
633    /// the representation of `OsString` is opaque.
634    ///
635    /// # Examples
636    ///
637    /// Basic usage:
638    ///
639    /// ```
640    /// use bstr::ByteVec;
641    ///
642    /// let bs = Vec::from_slice(b"foo\xFFbar");
643    /// let os_str = bs.into_os_string_lossy();
644    /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
645    /// ```
646    #[inline]
647    #[cfg(feature = "std")]
648    fn into_os_string_lossy(self) -> OsString
649    where
650        Self: Sized,
651    {
652        #[cfg(unix)]
653        #[inline]
654        fn imp(v: Vec<u8>) -> OsString {
655            use std::os::unix::ffi::OsStringExt;
656
657            OsString::from_vec(v)
658        }
659
660        #[cfg(not(unix))]
661        #[inline]
662        fn imp(v: Vec<u8>) -> OsString {
663            OsString::from(v.into_string_lossy())
664        }
665
666        imp(self.into_vec())
667    }
668
669    /// Converts this byte string into an owned file path, in place.
670    ///
671    /// When paths can be constructed from arbitrary byte sequences, this
672    /// always succeeds and is zero cost. Otherwise, if this byte string is not
673    /// valid UTF-8, then an error (with the original byte string) is returned.
674    ///
675    /// # Examples
676    ///
677    /// Basic usage:
678    ///
679    /// ```
680    /// use bstr::ByteVec;
681    ///
682    /// let bs = Vec::from("foo");
683    /// let path = bs.into_path_buf().expect("should be valid UTF-8");
684    /// assert_eq!(path.as_os_str(), "foo");
685    /// ```
686    #[cfg(feature = "std")]
687    #[inline]
688    fn into_path_buf(self) -> Result<PathBuf, FromUtf8Error>
689    where
690        Self: Sized,
691    {
692        self.into_os_string().map(PathBuf::from)
693    }
694
695    /// Lossily converts this byte string into an owned file path, in place.
696    ///
697    /// When paths can be constructed from arbitrary byte sequences, this is
698    /// zero cost and always returns a slice. Otherwise, this will perform a
699    /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
700    /// the Unicode replacement codepoint.
701    ///
702    /// Note that this can prevent the correct roundtripping of file paths when
703    /// the representation of `PathBuf` is opaque.
704    ///
705    /// # Examples
706    ///
707    /// Basic usage:
708    ///
709    /// ```
710    /// use bstr::ByteVec;
711    ///
712    /// let bs = Vec::from_slice(b"foo\xFFbar");
713    /// let path = bs.into_path_buf_lossy();
714    /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
715    /// ```
716    #[inline]
717    #[cfg(feature = "std")]
718    fn into_path_buf_lossy(self) -> PathBuf
719    where
720        Self: Sized,
721    {
722        PathBuf::from(self.into_os_string_lossy())
723    }
724
725    /// Removes the last byte from this `Vec<u8>` and returns it.
726    ///
727    /// If this byte string is empty, then `None` is returned.
728    ///
729    /// If the last codepoint in this byte string is not ASCII, then removing
730    /// the last byte could make this byte string contain invalid UTF-8.
731    ///
732    /// Note that this is equivalent to the generic `Vec::pop` method. This
733    /// method is provided to permit callers to explicitly differentiate
734    /// between popping bytes and codepoints.
735    ///
736    /// # Examples
737    ///
738    /// Basic usage:
739    ///
740    /// ```
741    /// use bstr::ByteVec;
742    ///
743    /// let mut s = Vec::from("foo");
744    /// assert_eq!(s.pop_byte(), Some(b'o'));
745    /// assert_eq!(s.pop_byte(), Some(b'o'));
746    /// assert_eq!(s.pop_byte(), Some(b'f'));
747    /// assert_eq!(s.pop_byte(), None);
748    /// ```
749    #[inline]
750    fn pop_byte(&mut self) -> Option<u8> {
751        self.as_vec_mut().pop()
752    }
753
754    /// Removes the last codepoint from this `Vec<u8>` and returns it.
755    ///
756    /// If this byte string is empty, then `None` is returned. If the last
757    /// bytes of this byte string do not correspond to a valid UTF-8 code unit
758    /// sequence, then the Unicode replacement codepoint is yielded instead in
759    /// accordance with the
760    /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
761    ///
762    /// # Examples
763    ///
764    /// Basic usage:
765    ///
766    /// ```
767    /// use bstr::ByteVec;
768    ///
769    /// let mut s = Vec::from("foo");
770    /// assert_eq!(s.pop_char(), Some('o'));
771    /// assert_eq!(s.pop_char(), Some('o'));
772    /// assert_eq!(s.pop_char(), Some('f'));
773    /// assert_eq!(s.pop_char(), None);
774    /// ```
775    ///
776    /// This shows the replacement codepoint substitution policy. Note that
777    /// the first pop yields a replacement codepoint but actually removes two
778    /// bytes. This is in contrast with subsequent pops when encountering
779    /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
780    /// code unit sequence.
781    ///
782    /// ```
783    /// use bstr::ByteVec;
784    ///
785    /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
786    /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
787    /// assert_eq!(s.pop_char(), Some('o'));
788    /// assert_eq!(s.pop_char(), Some('o'));
789    /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
790    /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
791    /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
792    /// assert_eq!(s.pop_char(), Some('f'));
793    /// assert_eq!(s.pop_char(), None);
794    /// ```
795    #[inline]
796    fn pop_char(&mut self) -> Option<char> {
797        let (ch, size) = utf8::decode_last_lossy(self.as_vec());
798        if size == 0 {
799            return None;
800        }
801        let new_len = self.as_vec().len() - size;
802        self.as_vec_mut().truncate(new_len);
803        Some(ch)
804    }
805
806    /// Removes a `char` from this `Vec<u8>` at the given byte position and
807    /// returns it.
808    ///
809    /// If the bytes at the given position do not lead to a valid UTF-8 code
810    /// unit sequence, then a
811    /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
812    ///
813    /// # Panics
814    ///
815    /// Panics if `at` is larger than or equal to this byte string's length.
816    ///
817    /// # Examples
818    ///
819    /// Basic usage:
820    ///
821    /// ```
822    /// use bstr::ByteVec;
823    ///
824    /// let mut s = Vec::from("foo☃bar");
825    /// assert_eq!(s.remove_char(3), '☃');
826    /// assert_eq!(s, b"foobar");
827    /// ```
828    ///
829    /// This example shows how the Unicode replacement codepoint policy is
830    /// used:
831    ///
832    /// ```
833    /// use bstr::ByteVec;
834    ///
835    /// let mut s = Vec::from_slice(b"foo\xFFbar");
836    /// assert_eq!(s.remove_char(3), '\u{FFFD}');
837    /// assert_eq!(s, b"foobar");
838    /// ```
839    #[inline]
840    fn remove_char(&mut self, at: usize) -> char {
841        let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
842        assert!(
843            size > 0,
844            "expected {} to be less than {}",
845            at,
846            self.as_vec().len(),
847        );
848        self.as_vec_mut().drain(at..at + size);
849        ch
850    }
851
852    /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
853    /// position.
854    ///
855    /// This is an `O(n)` operation as it may copy a number of elements in this
856    /// byte string proportional to its length.
857    ///
858    /// # Panics
859    ///
860    /// Panics if `at` is larger than the byte string's length.
861    ///
862    /// # Examples
863    ///
864    /// Basic usage:
865    ///
866    /// ```
867    /// use bstr::ByteVec;
868    ///
869    /// let mut s = Vec::from("foobar");
870    /// s.insert_char(3, '☃');
871    /// assert_eq!(s, "foo☃bar".as_bytes());
872    /// ```
873    #[inline]
874    fn insert_char(&mut self, at: usize, ch: char) {
875        self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
876    }
877
878    /// Inserts the given byte string into this byte string at a particular
879    /// byte position.
880    ///
881    /// This is an `O(n)` operation as it may copy a number of elements in this
882    /// byte string proportional to its length.
883    ///
884    /// The given byte string may be any type that can be cheaply converted
885    /// into a `&[u8]`. This includes, but is not limited to, `&str` and
886    /// `&[u8]`.
887    ///
888    /// # Panics
889    ///
890    /// Panics if `at` is larger than the byte string's length.
891    ///
892    /// # Examples
893    ///
894    /// Basic usage:
895    ///
896    /// ```
897    /// use bstr::ByteVec;
898    ///
899    /// let mut s = Vec::from("foobar");
900    /// s.insert_str(3, "☃☃☃");
901    /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
902    /// ```
903    #[inline]
904    fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
905        let bytes = bytes.as_ref();
906        let len = self.as_vec().len();
907        assert!(at <= len, "expected {} to be <= {}", at, len);
908
909        // SAFETY: We'd like to efficiently splice in the given bytes into
910        // this byte string. Since we are only working with `u8` elements here,
911        // we only need to consider whether our bounds are correct and whether
912        // our byte string has enough space.
913        self.as_vec_mut().reserve(bytes.len());
914        unsafe {
915            // Shift bytes after `at` over by the length of `bytes` to make
916            // room for it. This requires referencing two regions of memory
917            // that may overlap, so we use ptr::copy.
918            ptr::copy(
919                self.as_vec().as_ptr().add(at),
920                self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
921                len - at,
922            );
923            // Now copy the bytes given into the room we made above. In this
924            // case, we know that the given bytes cannot possibly overlap
925            // with this byte string since we have a mutable borrow of the
926            // latter. Thus, we can use a nonoverlapping copy.
927            ptr::copy_nonoverlapping(
928                bytes.as_ptr(),
929                self.as_vec_mut().as_mut_ptr().add(at),
930                bytes.len(),
931            );
932            self.as_vec_mut().set_len(len + bytes.len());
933        }
934    }
935
936    /// Removes the specified range in this byte string and replaces it with
937    /// the given bytes. The given bytes do not need to have the same length
938    /// as the range provided.
939    ///
940    /// # Panics
941    ///
942    /// Panics if the given range is invalid.
943    ///
944    /// # Examples
945    ///
946    /// Basic usage:
947    ///
948    /// ```
949    /// use bstr::ByteVec;
950    ///
951    /// let mut s = Vec::from("foobar");
952    /// s.replace_range(2..4, "xxxxx");
953    /// assert_eq!(s, "foxxxxxar".as_bytes());
954    /// ```
955    #[inline]
956    fn replace_range<R, B>(&mut self, range: R, replace_with: B)
957    where
958        R: ops::RangeBounds<usize>,
959        B: AsRef<[u8]>,
960    {
961        self.as_vec_mut().splice(range, replace_with.as_ref().iter().copied());
962    }
963
964    /// Creates a draining iterator that removes the specified range in this
965    /// `Vec<u8>` and yields each of the removed bytes.
966    ///
967    /// Note that the elements specified by the given range are removed
968    /// regardless of whether the returned iterator is fully exhausted.
969    ///
970    /// Also note that is is unspecified how many bytes are removed from the
971    /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
972    ///
973    /// # Panics
974    ///
975    /// Panics if the given range is not valid.
976    ///
977    /// # Examples
978    ///
979    /// Basic usage:
980    ///
981    /// ```
982    /// use bstr::ByteVec;
983    ///
984    /// let mut s = Vec::from("foobar");
985    /// {
986    ///     let mut drainer = s.drain_bytes(2..4);
987    ///     assert_eq!(drainer.next(), Some(b'o'));
988    ///     assert_eq!(drainer.next(), Some(b'b'));
989    ///     assert_eq!(drainer.next(), None);
990    /// }
991    /// assert_eq!(s, "foar".as_bytes());
992    /// ```
993    #[inline]
994    fn drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_>
995    where
996        R: ops::RangeBounds<usize>,
997    {
998        DrainBytes { it: self.as_vec_mut().drain(range) }
999    }
1000}
1001
1002/// A draining byte oriented iterator for `Vec<u8>`.
1003///
1004/// This iterator is created by
1005/// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
1006///
1007/// # Examples
1008///
1009/// Basic usage:
1010///
1011/// ```
1012/// use bstr::ByteVec;
1013///
1014/// let mut s = Vec::from("foobar");
1015/// {
1016///     let mut drainer = s.drain_bytes(2..4);
1017///     assert_eq!(drainer.next(), Some(b'o'));
1018///     assert_eq!(drainer.next(), Some(b'b'));
1019///     assert_eq!(drainer.next(), None);
1020/// }
1021/// assert_eq!(s, "foar".as_bytes());
1022/// ```
1023#[derive(Debug)]
1024pub struct DrainBytes<'a> {
1025    it: vec::Drain<'a, u8>,
1026}
1027
1028impl<'a> iter::FusedIterator for DrainBytes<'a> {}
1029
1030impl<'a> Iterator for DrainBytes<'a> {
1031    type Item = u8;
1032
1033    #[inline]
1034    fn next(&mut self) -> Option<u8> {
1035        self.it.next()
1036    }
1037}
1038
1039impl<'a> DoubleEndedIterator for DrainBytes<'a> {
1040    #[inline]
1041    fn next_back(&mut self) -> Option<u8> {
1042        self.it.next_back()
1043    }
1044}
1045
1046impl<'a> ExactSizeIterator for DrainBytes<'a> {
1047    #[inline]
1048    fn len(&self) -> usize {
1049        self.it.len()
1050    }
1051}
1052
1053/// An error that may occur when converting a `Vec<u8>` to a `String`.
1054///
1055/// This error includes the original `Vec<u8>` that failed to convert to a
1056/// `String`. This permits callers to recover the allocation used even if it
1057/// it not valid UTF-8.
1058///
1059/// # Examples
1060///
1061/// Basic usage:
1062///
1063/// ```
1064/// use bstr::{B, ByteVec};
1065///
1066/// let bytes = Vec::from_slice(b"foo\xFFbar");
1067/// let err = bytes.into_string().unwrap_err();
1068///
1069/// assert_eq!(err.utf8_error().valid_up_to(), 3);
1070/// assert_eq!(err.utf8_error().error_len(), Some(1));
1071///
1072/// // At no point in this example is an allocation performed.
1073/// let bytes = Vec::from(err.into_vec());
1074/// assert_eq!(bytes, B(b"foo\xFFbar"));
1075/// ```
1076#[derive(Debug, Eq, PartialEq)]
1077pub struct FromUtf8Error {
1078    original: Vec<u8>,
1079    err: Utf8Error,
1080}
1081
1082impl FromUtf8Error {
1083    /// Return the original bytes as a slice that failed to convert to a
1084    /// `String`.
1085    ///
1086    /// # Examples
1087    ///
1088    /// Basic usage:
1089    ///
1090    /// ```
1091    /// use bstr::{B, ByteVec};
1092    ///
1093    /// let bytes = Vec::from_slice(b"foo\xFFbar");
1094    /// let err = bytes.into_string().unwrap_err();
1095    ///
1096    /// // At no point in this example is an allocation performed.
1097    /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
1098    /// ```
1099    #[inline]
1100    pub fn as_bytes(&self) -> &[u8] {
1101        &self.original
1102    }
1103
1104    /// Consume this error and return the original byte string that failed to
1105    /// convert to a `String`.
1106    ///
1107    /// # Examples
1108    ///
1109    /// Basic usage:
1110    ///
1111    /// ```
1112    /// use bstr::{B, ByteVec};
1113    ///
1114    /// let bytes = Vec::from_slice(b"foo\xFFbar");
1115    /// let err = bytes.into_string().unwrap_err();
1116    /// let original = err.into_vec();
1117    ///
1118    /// // At no point in this example is an allocation performed.
1119    /// assert_eq!(original, B(b"foo\xFFbar"));
1120    /// ```
1121    #[inline]
1122    pub fn into_vec(self) -> Vec<u8> {
1123        self.original
1124    }
1125
1126    /// Return the underlying UTF-8 error that occurred. This error provides
1127    /// information on the nature and location of the invalid UTF-8 detected.
1128    ///
1129    /// # Examples
1130    ///
1131    /// Basic usage:
1132    ///
1133    /// ```
1134    /// use bstr::{B, ByteVec};
1135    ///
1136    /// let bytes = Vec::from_slice(b"foo\xFFbar");
1137    /// let err = bytes.into_string().unwrap_err();
1138    ///
1139    /// assert_eq!(err.utf8_error().valid_up_to(), 3);
1140    /// assert_eq!(err.utf8_error().error_len(), Some(1));
1141    /// ```
1142    #[inline]
1143    pub fn utf8_error(&self) -> &Utf8Error {
1144        &self.err
1145    }
1146}
1147
1148#[cfg(feature = "std")]
1149impl error::Error for FromUtf8Error {
1150    #[inline]
1151    fn description(&self) -> &str {
1152        "invalid UTF-8 vector"
1153    }
1154}
1155
1156impl fmt::Display for FromUtf8Error {
1157    #[inline]
1158    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1159        write!(f, "{}", self.err)
1160    }
1161}
1162
1163#[cfg(all(test, feature = "std"))]
1164mod tests {
1165    use alloc::{vec, vec::Vec};
1166
1167    use crate::ext_vec::ByteVec;
1168
1169    #[test]
1170    fn insert() {
1171        let mut s = vec![];
1172        s.insert_str(0, "foo");
1173        assert_eq!(s, "foo".as_bytes());
1174
1175        let mut s = Vec::from("a");
1176        s.insert_str(0, "foo");
1177        assert_eq!(s, "fooa".as_bytes());
1178
1179        let mut s = Vec::from("a");
1180        s.insert_str(1, "foo");
1181        assert_eq!(s, "afoo".as_bytes());
1182
1183        let mut s = Vec::from("foobar");
1184        s.insert_str(3, "quux");
1185        assert_eq!(s, "fooquuxbar".as_bytes());
1186
1187        let mut s = Vec::from("foobar");
1188        s.insert_str(3, "x");
1189        assert_eq!(s, "fooxbar".as_bytes());
1190
1191        let mut s = Vec::from("foobar");
1192        s.insert_str(0, "x");
1193        assert_eq!(s, "xfoobar".as_bytes());
1194
1195        let mut s = Vec::from("foobar");
1196        s.insert_str(6, "x");
1197        assert_eq!(s, "foobarx".as_bytes());
1198
1199        let mut s = Vec::from("foobar");
1200        s.insert_str(3, "quuxbazquux");
1201        assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
1202    }
1203
1204    #[test]
1205    #[should_panic]
1206    fn insert_fail1() {
1207        let mut s = vec![];
1208        s.insert_str(1, "foo");
1209    }
1210
1211    #[test]
1212    #[should_panic]
1213    fn insert_fail2() {
1214        let mut s = Vec::from("a");
1215        s.insert_str(2, "foo");
1216    }
1217
1218    #[test]
1219    #[should_panic]
1220    fn insert_fail3() {
1221        let mut s = Vec::from("foobar");
1222        s.insert_str(7, "foo");
1223    }
1224}
bstr/ext_vec.rs

bstr/
ext_vec.rs