bstr/
ext_vec.rs

Help
1use core::fmt;
2use core::iter;
3use core::ops;
4use core::ptr;
5
6use alloc::{borrow::Cow, string::String, vec, vec::Vec};
7
8#[cfg(feature = "std")]
9use std::{
10    error,
11    ffi::{OsStr, OsString},
12    path::{Path, PathBuf},
13};
14
15use crate::{
16    ext_slice::ByteSlice,
17    utf8::{self, Utf8Error},
18};
19
20/// Concatenate the elements given by the iterator together into a single
21/// `Vec<u8>`.
22///
23/// The elements may be any type that can be cheaply converted into an `&[u8]`.
24/// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
25///
26/// # Examples
27///
28/// Basic usage:
29///
30/// ```
31/// use bstr;
32///
33/// let s = bstr::concat(&["foo", "bar", "baz"]);
34/// assert_eq!(s, "foobarbaz".as_bytes());
35/// ```
36#[inline]
37pub fn concat<T, I>(elements: I) -> Vec<u8>
38where
39    T: AsRef<[u8]>,
40    I: IntoIterator<Item = T>,
41{
42    let mut dest = vec![];
43    for element in elements {
44        dest.push_str(element);
45    }
46    dest
47}
48
49/// Join the elements given by the iterator with the given separator into a
50/// single `Vec<u8>`.
51///
52/// Both the separator and the elements may be any type that can be cheaply
53/// converted into an `&[u8]`. This includes, but is not limited to,
54/// `&str`, `&BStr` and `&[u8]` itself.
55///
56/// # Examples
57///
58/// Basic usage:
59///
60/// ```
61/// use bstr;
62///
63/// let s = bstr::join(",", &["foo", "bar", "baz"]);
64/// assert_eq!(s, "foo,bar,baz".as_bytes());
65/// ```
66#[inline]
67pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
68where
69    B: AsRef<[u8]>,
70    T: AsRef<[u8]>,
71    I: IntoIterator<Item = T>,
72{
73    let mut it = elements.into_iter();
74    let mut dest = vec![];
75    match it.next() {
76        None => return dest,
77        Some(first) => {
78            dest.push_str(first);
79        }
80    }
81    for element in it {
82        dest.push_str(&separator);
83        dest.push_str(element);
84    }
85    dest
86}
87
88impl ByteVec for Vec<u8> {
89    #[inline]
90    fn as_vec(&self) -> &Vec<u8> {
91        self
92    }
93
94    #[inline]
95    fn as_vec_mut(&mut self) -> &mut Vec<u8> {
96        self
97    }
98
99    #[inline]
100    fn into_vec(self) -> Vec<u8> {
101        self
102    }
103}
104
105/// Ensure that callers cannot implement `ByteSlice` by making an
106/// umplementable trait its super trait.
107mod private {
108    pub trait Sealed {}
109}
110impl private::Sealed for Vec<u8> {}
111
112/// A trait that extends `Vec<u8>` with string oriented methods.
113///
114/// Note that when using the constructor methods, such as
115/// `ByteVec::from_slice`, one should actually call them using the concrete
116/// type. For example:
117///
118/// ```
119/// use bstr::{B, ByteVec};
120///
121/// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
122/// assert_eq!(s, B("abc"));
123/// ```
124///
125/// This trait is sealed and cannot be implemented outside of `bstr`.
126pub trait ByteVec: private::Sealed {
127    /// A method for accessing the raw vector bytes of this type. This is
128    /// always a no-op and callers shouldn't care about it. This only exists
129    /// for making the extension trait work.
130    #[doc(hidden)]
131    fn as_vec(&self) -> &Vec<u8>;
132
133    /// A method for accessing the raw vector bytes of this type, mutably. This
134    /// is always a no-op and callers shouldn't care about it. This only exists
135    /// for making the extension trait work.
136    #[doc(hidden)]
137    fn as_vec_mut(&mut self) -> &mut Vec<u8>;
138
139    /// A method for consuming ownership of this vector. This is always a no-op
140    /// and callers shouldn't care about it. This only exists for making the
141    /// extension trait work.
142    #[doc(hidden)]
143    fn into_vec(self) -> Vec<u8>
144    where
145        Self: Sized;
146
147    /// Create a new owned byte string from the given byte slice.
148    ///
149    /// # Examples
150    ///
151    /// Basic usage:
152    ///
153    /// ```
154    /// use bstr::{B, ByteVec};
155    ///
156    /// let s = Vec::from_slice(b"abc");
157    /// assert_eq!(s, B("abc"));
158    /// ```
159    #[inline]
160    fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
161        bytes.as_ref().to_vec()
162    }
163
164    /// Create a new byte string from an owned OS string.
165    ///
166    /// When the underlying bytes of OS strings are accessible, then this
167    /// always succeeds and is zero cost. Otherwise, this returns the given
168    /// `OsString` if it is not valid UTF-8.
169    ///
170    /// # Examples
171    ///
172    /// Basic usage:
173    ///
174    /// ```
175    /// use std::ffi::OsString;
176    ///
177    /// use bstr::{B, ByteVec};
178    ///
179    /// let os_str = OsString::from("foo");
180    /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
181    /// assert_eq!(bs, B("foo"));
182    /// ```
183    #[inline]
184    #[cfg(feature = "std")]
185    fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
186        #[cfg(unix)]
187        #[inline]
188        fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
189            use std::os::unix::ffi::OsStringExt;
190
191            Ok(Vec::from(os_str.into_vec()))
192        }
193
194        #[cfg(not(unix))]
195        #[inline]
196        fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
197            os_str.into_string().map(Vec::from)
198        }
199
200        imp(os_str)
201    }
202
203    /// Lossily create a new byte string from an OS string slice.
204    ///
205    /// When the underlying bytes of OS strings are accessible, then this is
206    /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
207    /// performed and if the given OS string is not valid UTF-8, then it is
208    /// lossily decoded into valid UTF-8 (with invalid bytes replaced by the
209    /// Unicode replacement codepoint).
210    ///
211    /// # Examples
212    ///
213    /// Basic usage:
214    ///
215    /// ```
216    /// use std::ffi::OsStr;
217    ///
218    /// use bstr::{B, ByteVec};
219    ///
220    /// let os_str = OsStr::new("foo");
221    /// let bs = Vec::from_os_str_lossy(os_str);
222    /// assert_eq!(bs, B("foo"));
223    /// ```
224    #[inline]
225    #[cfg(feature = "std")]
226    fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
227        #[cfg(unix)]
228        #[inline]
229        fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
230            use std::os::unix::ffi::OsStrExt;
231
232            Cow::Borrowed(os_str.as_bytes())
233        }
234
235        #[cfg(not(unix))]
236        #[inline]
237        fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
238            match os_str.to_string_lossy() {
239                Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
240                Cow::Owned(x) => Cow::Owned(Vec::from(x)),
241            }
242        }
243
244        imp(os_str)
245    }
246
247    /// Create a new byte string from an owned file path.
248    ///
249    /// When the underlying bytes of paths are accessible, then this always
250    /// succeeds and is zero cost. Otherwise, this returns the given `PathBuf`
251    /// if it is not valid UTF-8.
252    ///
253    /// # Examples
254    ///
255    /// Basic usage:
256    ///
257    /// ```
258    /// use std::path::PathBuf;
259    ///
260    /// use bstr::{B, ByteVec};
261    ///
262    /// let path = PathBuf::from("foo");
263    /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
264    /// assert_eq!(bs, B("foo"));
265    /// ```
266    #[inline]
267    #[cfg(feature = "std")]
268    fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
269        Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
270    }
271
272    /// Lossily create a new byte string from a file path.
273    ///
274    /// When the underlying bytes of paths are accessible, then this is
275    /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
276    /// performed and if the given path is not valid UTF-8, then it is lossily
277    /// decoded into valid UTF-8 (with invalid bytes replaced by the Unicode
278    /// replacement codepoint).
279    ///
280    /// # Examples
281    ///
282    /// Basic usage:
283    ///
284    /// ```
285    /// use std::path::Path;
286    ///
287    /// use bstr::{B, ByteVec};
288    ///
289    /// let path = Path::new("foo");
290    /// let bs = Vec::from_path_lossy(path);
291    /// assert_eq!(bs, B("foo"));
292    /// ```
293    #[inline]
294    #[cfg(feature = "std")]
295    fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
296        Vec::from_os_str_lossy(path.as_os_str())
297    }
298
299    /// Unescapes the given string into its raw bytes.
300    ///
301    /// This looks for the escape sequences `\xNN`, `\0`, `\r`, `\n`, `\t`
302    /// and `\` and translates them into their corresponding unescaped form.
303    ///
304    /// Incomplete escape sequences or things that look like escape sequences
305    /// but are not (for example, `\i` or `\xYZ`) are passed through literally.
306    ///
307    /// This is the dual of [`ByteSlice::escape_bytes`].
308    ///
309    /// Note that the zero or NUL byte may be represented as either `\0` or
310    /// `\x00`. Both will be unescaped into the zero byte.
311    ///
312    /// # Examples
313    ///
314    /// This shows basic usage:
315    ///
316    /// ```
317    /// # #[cfg(feature = "alloc")] {
318    /// use bstr::{B, BString, ByteVec};
319    ///
320    /// assert_eq!(
321    ///     BString::from(b"foo\xFFbar"),
322    ///     Vec::unescape_bytes(r"foo\xFFbar"),
323    /// );
324    /// assert_eq!(
325    ///     BString::from(b"foo\nbar"),
326    ///     Vec::unescape_bytes(r"foo\nbar"),
327    /// );
328    /// assert_eq!(
329    ///     BString::from(b"foo\tbar"),
330    ///     Vec::unescape_bytes(r"foo\tbar"),
331    /// );
332    /// assert_eq!(
333    ///     BString::from(b"foo\\bar"),
334    ///     Vec::unescape_bytes(r"foo\\bar"),
335    /// );
336    /// assert_eq!(
337    ///     BString::from("foo☃bar"),
338    ///     Vec::unescape_bytes(r"foo☃bar"),
339    /// );
340    ///
341    /// # }
342    /// ```
343    ///
344    /// This shows some examples of how incomplete or "incorrect" escape
345    /// sequences get passed through literally.
346    ///
347    /// ```
348    /// # #[cfg(feature = "alloc")] {
349    /// use bstr::{B, BString, ByteVec};
350    ///
351    /// // Show some incomplete escape sequences.
352    /// assert_eq!(
353    ///     BString::from(br"\"),
354    ///     Vec::unescape_bytes(r"\"),
355    /// );
356    /// assert_eq!(
357    ///     BString::from(br"\"),
358    ///     Vec::unescape_bytes(r"\\"),
359    /// );
360    /// assert_eq!(
361    ///     BString::from(br"\x"),
362    ///     Vec::unescape_bytes(r"\x"),
363    /// );
364    /// assert_eq!(
365    ///     BString::from(br"\xA"),
366    ///     Vec::unescape_bytes(r"\xA"),
367    /// );
368    /// // And now some that kind of look like escape
369    /// // sequences, but aren't.
370    /// assert_eq!(
371    ///     BString::from(br"\xZ"),
372    ///     Vec::unescape_bytes(r"\xZ"),
373    /// );
374    /// assert_eq!(
375    ///     BString::from(br"\xZZ"),
376    ///     Vec::unescape_bytes(r"\xZZ"),
377    /// );
378    /// assert_eq!(
379    ///     BString::from(br"\i"),
380    ///     Vec::unescape_bytes(r"\i"),
381    /// );
382    /// assert_eq!(
383    ///     BString::from(br"\u"),
384    ///     Vec::unescape_bytes(r"\u"),
385    /// );
386    /// assert_eq!(
387    ///     BString::from(br"\u{2603}"),
388    ///     Vec::unescape_bytes(r"\u{2603}"),
389    /// );
390    ///
391    /// # }
392    /// ```
393    #[inline]
394    #[cfg(feature = "alloc")]
395    fn unescape_bytes<S: AsRef<str>>(escaped: S) -> Vec<u8> {
396        let s = escaped.as_ref();
397        crate::escape_bytes::UnescapeBytes::new(s.chars()).collect()
398    }
399
400    /// Appends the given byte to the end of this byte string.
401    ///
402    /// Note that this is equivalent to the generic `Vec::push` method. This
403    /// method is provided to permit callers to explicitly differentiate
404    /// between pushing bytes, codepoints and strings.
405    ///
406    /// # Examples
407    ///
408    /// Basic usage:
409    ///
410    /// ```
411    /// use bstr::ByteVec;
412    ///
413    /// let mut s = <Vec<u8>>::from("abc");
414    /// s.push_byte(b'\xE2');
415    /// s.push_byte(b'\x98');
416    /// s.push_byte(b'\x83');
417    /// assert_eq!(s, "abc☃".as_bytes());
418    /// ```
419    #[inline]
420    fn push_byte(&mut self, byte: u8) {
421        self.as_vec_mut().push(byte);
422    }
423
424    /// Appends the given `char` to the end of this byte string.
425    ///
426    /// # Examples
427    ///
428    /// Basic usage:
429    ///
430    /// ```
431    /// use bstr::ByteVec;
432    ///
433    /// let mut s = <Vec<u8>>::from("abc");
434    /// s.push_char('1');
435    /// s.push_char('2');
436    /// s.push_char('3');
437    /// assert_eq!(s, "abc123".as_bytes());
438    /// ```
439    #[inline]
440    fn push_char(&mut self, ch: char) {
441        if ch.len_utf8() == 1 {
442            self.push_byte(ch as u8);
443            return;
444        }
445        self.as_vec_mut()
446            .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
447    }
448
449    /// Appends the given slice to the end of this byte string. This accepts
450    /// any type that be converted to a `&[u8]`. This includes, but is not
451    /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
452    ///
453    /// # Examples
454    ///
455    /// Basic usage:
456    ///
457    /// ```
458    /// use bstr::ByteVec;
459    ///
460    /// let mut s = <Vec<u8>>::from("abc");
461    /// s.push_str(b"123");
462    /// assert_eq!(s, "abc123".as_bytes());
463    /// ```
464    #[inline]
465    fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
466        self.as_vec_mut().extend_from_slice(bytes.as_ref());
467    }
468
469    /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
470    /// valid UTF-8.
471    ///
472    /// If it is not valid UTF-8, then a
473    /// [`FromUtf8Error`](struct.FromUtf8Error.html)
474    /// is returned. (This error can be used to examine why UTF-8 validation
475    /// failed, or to regain the original byte string.)
476    ///
477    /// # Examples
478    ///
479    /// Basic usage:
480    ///
481    /// ```
482    /// use bstr::ByteVec;
483    ///
484    /// let bytes = Vec::from("hello");
485    /// let string = bytes.into_string().unwrap();
486    ///
487    /// assert_eq!("hello", string);
488    /// ```
489    ///
490    /// If this byte string is not valid UTF-8, then an error will be returned.
491    /// That error can then be used to inspect the location at which invalid
492    /// UTF-8 was found, or to regain the original byte string:
493    ///
494    /// ```
495    /// use bstr::{B, ByteVec};
496    ///
497    /// let bytes = Vec::from_slice(b"foo\xFFbar");
498    /// let err = bytes.into_string().unwrap_err();
499    ///
500    /// assert_eq!(err.utf8_error().valid_up_to(), 3);
501    /// assert_eq!(err.utf8_error().error_len(), Some(1));
502    ///
503    /// // At no point in this example is an allocation performed.
504    /// let bytes = Vec::from(err.into_vec());
505    /// assert_eq!(bytes, B(b"foo\xFFbar"));
506    /// ```
507    #[inline]
508    fn into_string(self) -> Result<String, FromUtf8Error>
509    where
510        Self: Sized,
511    {
512        match utf8::validate(self.as_vec()) {
513            Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
514            Ok(()) => {
515                // SAFETY: This is safe because of the guarantees provided by
516                // utf8::validate.
517                unsafe { Ok(self.into_string_unchecked()) }
518            }
519        }
520    }
521
522    /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
523    /// contains invalid UTF-8, then the invalid bytes are replaced with the
524    /// Unicode replacement codepoint.
525    ///
526    /// # Examples
527    ///
528    /// Basic usage:
529    ///
530    /// ```
531    /// use bstr::ByteVec;
532    ///
533    /// let bytes = Vec::from_slice(b"foo\xFFbar");
534    /// let string = bytes.into_string_lossy();
535    /// assert_eq!(string, "foo\u{FFFD}bar");
536    /// ```
537    #[inline]
538    fn into_string_lossy(self) -> String
539    where
540        Self: Sized,
541    {
542        match self.as_vec().to_str_lossy() {
543            Cow::Borrowed(_) => {
544                // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
545                // the entire string is valid utf8.
546                unsafe { self.into_string_unchecked() }
547            }
548            Cow::Owned(s) => s,
549        }
550    }
551
552    /// Unsafely convert this byte string into a `String`, without checking for
553    /// valid UTF-8.
554    ///
555    /// # Safety
556    ///
557    /// Callers *must* ensure that this byte string is valid UTF-8 before
558    /// calling this method. Converting a byte string into a `String` that is
559    /// not valid UTF-8 is considered undefined behavior.
560    ///
561    /// This routine is useful in performance sensitive contexts where the
562    /// UTF-8 validity of the byte string is already known and it is
563    /// undesirable to pay the cost of an additional UTF-8 validation check
564    /// that [`into_string`](#method.into_string) performs.
565    ///
566    /// # Examples
567    ///
568    /// Basic usage:
569    ///
570    /// ```
571    /// use bstr::ByteVec;
572    ///
573    /// // SAFETY: This is safe because string literals are guaranteed to be
574    /// // valid UTF-8 by the Rust compiler.
575    /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
576    /// assert_eq!("☃βツ", s);
577    /// ```
578    #[inline]
579    unsafe fn into_string_unchecked(self) -> String
580    where
581        Self: Sized,
582    {
583        String::from_utf8_unchecked(self.into_vec())
584    }
585
586    /// Converts this byte string into an OS string, in place.
587    ///
588    /// When OS strings can be constructed from arbitrary byte sequences, this
589    /// always succeeds and is zero cost. Otherwise, if this byte string is not
590    /// valid UTF-8, then an error (with the original byte string) is returned.
591    ///
592    /// # Examples
593    ///
594    /// Basic usage:
595    ///
596    /// ```
597    /// use std::ffi::OsStr;
598    ///
599    /// use bstr::ByteVec;
600    ///
601    /// let bs = Vec::from("foo");
602    /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
603    /// assert_eq!(os_str, OsStr::new("foo"));
604    /// ```
605    #[cfg(feature = "std")]
606    #[inline]
607    fn into_os_string(self) -> Result<OsString, FromUtf8Error>
608    where
609        Self: Sized,
610    {
611        #[cfg(unix)]
612        #[inline]
613        fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
614            use std::os::unix::ffi::OsStringExt;
615
616            Ok(OsString::from_vec(v))
617        }
618
619        #[cfg(not(unix))]
620        #[inline]
621        fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
622            v.into_string().map(OsString::from)
623        }
624
625        imp(self.into_vec())
626    }
627
628    /// Lossily converts this byte string into an OS string, in place.
629    ///
630    /// When OS strings can be constructed from arbitrary byte sequences, this
631    /// is zero cost and always returns a slice. Otherwise, this will perform a
632    /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
633    /// the Unicode replacement codepoint.
634    ///
635    /// Note that this can prevent the correct roundtripping of file paths when
636    /// the representation of `OsString` is opaque.
637    ///
638    /// # Examples
639    ///
640    /// Basic usage:
641    ///
642    /// ```
643    /// use bstr::ByteVec;
644    ///
645    /// let bs = Vec::from_slice(b"foo\xFFbar");
646    /// let os_str = bs.into_os_string_lossy();
647    /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
648    /// ```
649    #[inline]
650    #[cfg(feature = "std")]
651    fn into_os_string_lossy(self) -> OsString
652    where
653        Self: Sized,
654    {
655        #[cfg(unix)]
656        #[inline]
657        fn imp(v: Vec<u8>) -> OsString {
658            use std::os::unix::ffi::OsStringExt;
659
660            OsString::from_vec(v)
661        }
662
663        #[cfg(not(unix))]
664        #[inline]
665        fn imp(v: Vec<u8>) -> OsString {
666            OsString::from(v.into_string_lossy())
667        }
668
669        imp(self.into_vec())
670    }
671
672    /// Converts this byte string into an owned file path, in place.
673    ///
674    /// When paths can be constructed from arbitrary byte sequences, this
675    /// always succeeds and is zero cost. Otherwise, if this byte string is not
676    /// valid UTF-8, then an error (with the original byte string) is returned.
677    ///
678    /// # Examples
679    ///
680    /// Basic usage:
681    ///
682    /// ```
683    /// use bstr::ByteVec;
684    ///
685    /// let bs = Vec::from("foo");
686    /// let path = bs.into_path_buf().expect("should be valid UTF-8");
687    /// assert_eq!(path.as_os_str(), "foo");
688    /// ```
689    #[cfg(feature = "std")]
690    #[inline]
691    fn into_path_buf(self) -> Result<PathBuf, FromUtf8Error>
692    where
693        Self: Sized,
694    {
695        self.into_os_string().map(PathBuf::from)
696    }
697
698    /// Lossily converts this byte string into an owned file path, in place.
699    ///
700    /// When paths can be constructed from arbitrary byte sequences, this is
701    /// zero cost and always returns a slice. Otherwise, this will perform a
702    /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
703    /// the Unicode replacement codepoint.
704    ///
705    /// Note that this can prevent the correct roundtripping of file paths when
706    /// the representation of `PathBuf` is opaque.
707    ///
708    /// # Examples
709    ///
710    /// Basic usage:
711    ///
712    /// ```
713    /// use bstr::ByteVec;
714    ///
715    /// let bs = Vec::from_slice(b"foo\xFFbar");
716    /// let path = bs.into_path_buf_lossy();
717    /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
718    /// ```
719    #[inline]
720    #[cfg(feature = "std")]
721    fn into_path_buf_lossy(self) -> PathBuf
722    where
723        Self: Sized,
724    {
725        PathBuf::from(self.into_os_string_lossy())
726    }
727
728    /// Removes the last byte from this `Vec<u8>` and returns it.
729    ///
730    /// If this byte string is empty, then `None` is returned.
731    ///
732    /// If the last codepoint in this byte string is not ASCII, then removing
733    /// the last byte could make this byte string contain invalid UTF-8.
734    ///
735    /// Note that this is equivalent to the generic `Vec::pop` method. This
736    /// method is provided to permit callers to explicitly differentiate
737    /// between popping bytes and codepoints.
738    ///
739    /// # Examples
740    ///
741    /// Basic usage:
742    ///
743    /// ```
744    /// use bstr::ByteVec;
745    ///
746    /// let mut s = Vec::from("foo");
747    /// assert_eq!(s.pop_byte(), Some(b'o'));
748    /// assert_eq!(s.pop_byte(), Some(b'o'));
749    /// assert_eq!(s.pop_byte(), Some(b'f'));
750    /// assert_eq!(s.pop_byte(), None);
751    /// ```
752    #[inline]
753    fn pop_byte(&mut self) -> Option<u8> {
754        self.as_vec_mut().pop()
755    }
756
757    /// Removes the last codepoint from this `Vec<u8>` and returns it.
758    ///
759    /// If this byte string is empty, then `None` is returned. If the last
760    /// bytes of this byte string do not correspond to a valid UTF-8 code unit
761    /// sequence, then the Unicode replacement codepoint is yielded instead in
762    /// accordance with the
763    /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
764    ///
765    /// # Examples
766    ///
767    /// Basic usage:
768    ///
769    /// ```
770    /// use bstr::ByteVec;
771    ///
772    /// let mut s = Vec::from("foo");
773    /// assert_eq!(s.pop_char(), Some('o'));
774    /// assert_eq!(s.pop_char(), Some('o'));
775    /// assert_eq!(s.pop_char(), Some('f'));
776    /// assert_eq!(s.pop_char(), None);
777    /// ```
778    ///
779    /// This shows the replacement codepoint substitution policy. Note that
780    /// the first pop yields a replacement codepoint but actually removes two
781    /// bytes. This is in contrast with subsequent pops when encountering
782    /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
783    /// code unit sequence.
784    ///
785    /// ```
786    /// use bstr::ByteVec;
787    ///
788    /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
789    /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
790    /// assert_eq!(s.pop_char(), Some('o'));
791    /// assert_eq!(s.pop_char(), Some('o'));
792    /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
793    /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
794    /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
795    /// assert_eq!(s.pop_char(), Some('f'));
796    /// assert_eq!(s.pop_char(), None);
797    /// ```
798    #[inline]
799    fn pop_char(&mut self) -> Option<char> {
800        let (ch, size) = utf8::decode_last_lossy(self.as_vec());
801        if size == 0 {
802            return None;
803        }
804        let new_len = self.as_vec().len() - size;
805        self.as_vec_mut().truncate(new_len);
806        Some(ch)
807    }
808
809    /// Removes a `char` from this `Vec<u8>` at the given byte position and
810    /// returns it.
811    ///
812    /// If the bytes at the given position do not lead to a valid UTF-8 code
813    /// unit sequence, then a
814    /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
815    ///
816    /// # Panics
817    ///
818    /// Panics if `at` is larger than or equal to this byte string's length.
819    ///
820    /// # Examples
821    ///
822    /// Basic usage:
823    ///
824    /// ```
825    /// use bstr::ByteVec;
826    ///
827    /// let mut s = Vec::from("foo☃bar");
828    /// assert_eq!(s.remove_char(3), '☃');
829    /// assert_eq!(s, b"foobar");
830    /// ```
831    ///
832    /// This example shows how the Unicode replacement codepoint policy is
833    /// used:
834    ///
835    /// ```
836    /// use bstr::ByteVec;
837    ///
838    /// let mut s = Vec::from_slice(b"foo\xFFbar");
839    /// assert_eq!(s.remove_char(3), '\u{FFFD}');
840    /// assert_eq!(s, b"foobar");
841    /// ```
842    #[inline]
843    fn remove_char(&mut self, at: usize) -> char {
844        let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
845        assert!(
846            size > 0,
847            "expected {} to be less than {}",
848            at,
849            self.as_vec().len(),
850        );
851        self.as_vec_mut().drain(at..at + size);
852        ch
853    }
854
855    /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
856    /// position.
857    ///
858    /// This is an `O(n)` operation as it may copy a number of elements in this
859    /// byte string proportional to its length.
860    ///
861    /// # Panics
862    ///
863    /// Panics if `at` is larger than the byte string's length.
864    ///
865    /// # Examples
866    ///
867    /// Basic usage:
868    ///
869    /// ```
870    /// use bstr::ByteVec;
871    ///
872    /// let mut s = Vec::from("foobar");
873    /// s.insert_char(3, '☃');
874    /// assert_eq!(s, "foo☃bar".as_bytes());
875    /// ```
876    #[inline]
877    fn insert_char(&mut self, at: usize, ch: char) {
878        self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
879    }
880
881    /// Inserts the given byte string into this byte string at a particular
882    /// byte position.
883    ///
884    /// This is an `O(n)` operation as it may copy a number of elements in this
885    /// byte string proportional to its length.
886    ///
887    /// The given byte string may be any type that can be cheaply converted
888    /// into a `&[u8]`. This includes, but is not limited to, `&str` and
889    /// `&[u8]`.
890    ///
891    /// # Panics
892    ///
893    /// Panics if `at` is larger than the byte string's length.
894    ///
895    /// # Examples
896    ///
897    /// Basic usage:
898    ///
899    /// ```
900    /// use bstr::ByteVec;
901    ///
902    /// let mut s = Vec::from("foobar");
903    /// s.insert_str(3, "☃☃☃");
904    /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
905    /// ```
906    #[inline]
907    fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
908        let bytes = bytes.as_ref();
909        let len = self.as_vec().len();
910        assert!(at <= len, "expected {} to be <= {}", at, len);
911
912        // SAFETY: We'd like to efficiently splice in the given bytes into
913        // this byte string. Since we are only working with `u8` elements here,
914        // we only need to consider whether our bounds are correct and whether
915        // our byte string has enough space.
916        self.as_vec_mut().reserve(bytes.len());
917        unsafe {
918            // Shift bytes after `at` over by the length of `bytes` to make
919            // room for it. This requires referencing two regions of memory
920            // that may overlap, so we use ptr::copy.
921            ptr::copy(
922                self.as_vec().as_ptr().add(at),
923                self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
924                len - at,
925            );
926            // Now copy the bytes given into the room we made above. In this
927            // case, we know that the given bytes cannot possibly overlap
928            // with this byte string since we have a mutable borrow of the
929            // latter. Thus, we can use a nonoverlapping copy.
930            ptr::copy_nonoverlapping(
931                bytes.as_ptr(),
932                self.as_vec_mut().as_mut_ptr().add(at),
933                bytes.len(),
934            );
935            self.as_vec_mut().set_len(len + bytes.len());
936        }
937    }
938
939    /// Removes the specified range in this byte string and replaces it with
940    /// the given bytes. The given bytes do not need to have the same length
941    /// as the range provided.
942    ///
943    /// # Panics
944    ///
945    /// Panics if the given range is invalid.
946    ///
947    /// # Examples
948    ///
949    /// Basic usage:
950    ///
951    /// ```
952    /// use bstr::ByteVec;
953    ///
954    /// let mut s = Vec::from("foobar");
955    /// s.replace_range(2..4, "xxxxx");
956    /// assert_eq!(s, "foxxxxxar".as_bytes());
957    /// ```
958    #[inline]
959    fn replace_range<R, B>(&mut self, range: R, replace_with: B)
960    where
961        R: ops::RangeBounds<usize>,
962        B: AsRef<[u8]>,
963    {
964        self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned());
965    }
966
967    /// Creates a draining iterator that removes the specified range in this
968    /// `Vec<u8>` and yields each of the removed bytes.
969    ///
970    /// Note that the elements specified by the given range are removed
971    /// regardless of whether the returned iterator is fully exhausted.
972    ///
973    /// Also note that is is unspecified how many bytes are removed from the
974    /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
975    ///
976    /// # Panics
977    ///
978    /// Panics if the given range is not valid.
979    ///
980    /// # Examples
981    ///
982    /// Basic usage:
983    ///
984    /// ```
985    /// use bstr::ByteVec;
986    ///
987    /// let mut s = Vec::from("foobar");
988    /// {
989    ///     let mut drainer = s.drain_bytes(2..4);
990    ///     assert_eq!(drainer.next(), Some(b'o'));
991    ///     assert_eq!(drainer.next(), Some(b'b'));
992    ///     assert_eq!(drainer.next(), None);
993    /// }
994    /// assert_eq!(s, "foar".as_bytes());
995    /// ```
996    #[inline]
997    fn drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_>
998    where
999        R: ops::RangeBounds<usize>,
1000    {
1001        DrainBytes { it: self.as_vec_mut().drain(range) }
1002    }
1003}
1004
1005/// A draining byte oriented iterator for `Vec<u8>`.
1006///
1007/// This iterator is created by
1008/// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
1009///
1010/// # Examples
1011///
1012/// Basic usage:
1013///
1014/// ```
1015/// use bstr::ByteVec;
1016///
1017/// let mut s = Vec::from("foobar");
1018/// {
1019///     let mut drainer = s.drain_bytes(2..4);
1020///     assert_eq!(drainer.next(), Some(b'o'));
1021///     assert_eq!(drainer.next(), Some(b'b'));
1022///     assert_eq!(drainer.next(), None);
1023/// }
1024/// assert_eq!(s, "foar".as_bytes());
1025/// ```
1026#[derive(Debug)]
1027pub struct DrainBytes<'a> {
1028    it: vec::Drain<'a, u8>,
1029}
1030
1031impl<'a> iter::FusedIterator for DrainBytes<'a> {}
1032
1033impl<'a> Iterator for DrainBytes<'a> {
1034    type Item = u8;
1035
1036    #[inline]
1037    fn next(&mut self) -> Option<u8> {
1038        self.it.next()
1039    }
1040}
1041
1042impl<'a> DoubleEndedIterator for DrainBytes<'a> {
1043    #[inline]
1044    fn next_back(&mut self) -> Option<u8> {
1045        self.it.next_back()
1046    }
1047}
1048
1049impl<'a> ExactSizeIterator for DrainBytes<'a> {
1050    #[inline]
1051    fn len(&self) -> usize {
1052        self.it.len()
1053    }
1054}
1055
1056/// An error that may occur when converting a `Vec<u8>` to a `String`.
1057///
1058/// This error includes the original `Vec<u8>` that failed to convert to a
1059/// `String`. This permits callers to recover the allocation used even if it
1060/// it not valid UTF-8.
1061///
1062/// # Examples
1063///
1064/// Basic usage:
1065///
1066/// ```
1067/// use bstr::{B, ByteVec};
1068///
1069/// let bytes = Vec::from_slice(b"foo\xFFbar");
1070/// let err = bytes.into_string().unwrap_err();
1071///
1072/// assert_eq!(err.utf8_error().valid_up_to(), 3);
1073/// assert_eq!(err.utf8_error().error_len(), Some(1));
1074///
1075/// // At no point in this example is an allocation performed.
1076/// let bytes = Vec::from(err.into_vec());
1077/// assert_eq!(bytes, B(b"foo\xFFbar"));
1078/// ```
1079#[derive(Debug, Eq, PartialEq)]
1080pub struct FromUtf8Error {
1081    original: Vec<u8>,
1082    err: Utf8Error,
1083}
1084
1085impl FromUtf8Error {
1086    /// Return the original bytes as a slice that failed to convert to a
1087    /// `String`.
1088    ///
1089    /// # Examples
1090    ///
1091    /// Basic usage:
1092    ///
1093    /// ```
1094    /// use bstr::{B, ByteVec};
1095    ///
1096    /// let bytes = Vec::from_slice(b"foo\xFFbar");
1097    /// let err = bytes.into_string().unwrap_err();
1098    ///
1099    /// // At no point in this example is an allocation performed.
1100    /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
1101    /// ```
1102    #[inline]
1103    pub fn as_bytes(&self) -> &[u8] {
1104        &self.original
1105    }
1106
1107    /// Consume this error and return the original byte string that failed to
1108    /// convert to a `String`.
1109    ///
1110    /// # Examples
1111    ///
1112    /// Basic usage:
1113    ///
1114    /// ```
1115    /// use bstr::{B, ByteVec};
1116    ///
1117    /// let bytes = Vec::from_slice(b"foo\xFFbar");
1118    /// let err = bytes.into_string().unwrap_err();
1119    /// let original = err.into_vec();
1120    ///
1121    /// // At no point in this example is an allocation performed.
1122    /// assert_eq!(original, B(b"foo\xFFbar"));
1123    /// ```
1124    #[inline]
1125    pub fn into_vec(self) -> Vec<u8> {
1126        self.original
1127    }
1128
1129    /// Return the underlying UTF-8 error that occurred. This error provides
1130    /// information on the nature and location of the invalid UTF-8 detected.
1131    ///
1132    /// # Examples
1133    ///
1134    /// Basic usage:
1135    ///
1136    /// ```
1137    /// use bstr::{B, ByteVec};
1138    ///
1139    /// let bytes = Vec::from_slice(b"foo\xFFbar");
1140    /// let err = bytes.into_string().unwrap_err();
1141    ///
1142    /// assert_eq!(err.utf8_error().valid_up_to(), 3);
1143    /// assert_eq!(err.utf8_error().error_len(), Some(1));
1144    /// ```
1145    #[inline]
1146    pub fn utf8_error(&self) -> &Utf8Error {
1147        &self.err
1148    }
1149}
1150
1151#[cfg(feature = "std")]
1152impl error::Error for FromUtf8Error {
1153    #[inline]
1154    fn description(&self) -> &str {
1155        "invalid UTF-8 vector"
1156    }
1157}
1158
1159impl fmt::Display for FromUtf8Error {
1160    #[inline]
1161    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1162        write!(f, "{}", self.err)
1163    }
1164}
1165
1166#[cfg(all(test, feature = "std"))]
1167mod tests {
1168    use crate::ext_vec::ByteVec;
1169
1170    #[test]
1171    fn insert() {
1172        let mut s = vec![];
1173        s.insert_str(0, "foo");
1174        assert_eq!(s, "foo".as_bytes());
1175
1176        let mut s = Vec::from("a");
1177        s.insert_str(0, "foo");
1178        assert_eq!(s, "fooa".as_bytes());
1179
1180        let mut s = Vec::from("a");
1181        s.insert_str(1, "foo");
1182        assert_eq!(s, "afoo".as_bytes());
1183
1184        let mut s = Vec::from("foobar");
1185        s.insert_str(3, "quux");
1186        assert_eq!(s, "fooquuxbar".as_bytes());
1187
1188        let mut s = Vec::from("foobar");
1189        s.insert_str(3, "x");
1190        assert_eq!(s, "fooxbar".as_bytes());
1191
1192        let mut s = Vec::from("foobar");
1193        s.insert_str(0, "x");
1194        assert_eq!(s, "xfoobar".as_bytes());
1195
1196        let mut s = Vec::from("foobar");
1197        s.insert_str(6, "x");
1198        assert_eq!(s, "foobarx".as_bytes());
1199
1200        let mut s = Vec::from("foobar");
1201        s.insert_str(3, "quuxbazquux");
1202        assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
1203    }
1204
1205    #[test]
1206    #[should_panic]
1207    fn insert_fail1() {
1208        let mut s = vec![];
1209        s.insert_str(1, "foo");
1210    }
1211
1212    #[test]
1213    #[should_panic]
1214    fn insert_fail2() {
1215        let mut s = Vec::from("a");
1216        s.insert_str(2, "foo");
1217    }
1218
1219    #[test]
1220    #[should_panic]
1221    fn insert_fail3() {
1222        let mut s = Vec::from("foobar");
1223        s.insert_str(7, "foo");
1224    }
1225}
bstr/ext_vec.rs

bstr/
ext_vec.rs