intaglio/
internal.rs

1//! A Wrapper around interned strings that maintains the safety invariants of
2//! the `'static` slices handed out to the interner.
3
4use core::fmt;
5use std::borrow::Cow;
6#[cfg(feature = "cstr")]
7use std::ffi::{CStr, CString};
8#[cfg(feature = "osstr")]
9use std::ffi::{OsStr, OsString};
10#[cfg(feature = "path")]
11use std::path::{Path, PathBuf};
12
13use self::boxed::PinBox;
14
15/// Wrapper around `&'static` slices that does not allow mutable access to the
16/// inner slice.
17pub struct Interned<T: 'static + ?Sized>(Slice<T>);
18
19impl From<Cow<'static, str>> for Interned<str> {
20    #[inline]
21    fn from(cow: Cow<'static, str>) -> Self {
22        Self(cow.into())
23    }
24}
25
26#[cfg(feature = "bytes")]
27impl From<Cow<'static, [u8]>> for Interned<[u8]> {
28    #[inline]
29    fn from(cow: Cow<'static, [u8]>) -> Self {
30        Self(cow.into())
31    }
32}
33
34#[cfg(feature = "cstr")]
35impl From<Cow<'static, CStr>> for Interned<CStr> {
36    #[inline]
37    fn from(cow: Cow<'static, CStr>) -> Self {
38        Self(cow.into())
39    }
40}
41
42#[cfg(feature = "osstr")]
43impl From<Cow<'static, OsStr>> for Interned<OsStr> {
44    #[inline]
45    fn from(cow: Cow<'static, OsStr>) -> Self {
46        Self(cow.into())
47    }
48}
49
50#[cfg(feature = "path")]
51impl From<Cow<'static, Path>> for Interned<Path> {
52    #[inline]
53    fn from(cow: Cow<'static, Path>) -> Self {
54        Self(cow.into())
55    }
56}
57
58impl<T> Interned<T>
59where
60    T: ?Sized,
61{
62    /// Return a reference to the inner slice.
63    #[inline]
64    pub fn as_slice(&self) -> &T {
65        self.0.as_slice()
66    }
67
68    /// Return a `'static` reference to the inner slice.
69    ///
70    /// # Safety
71    ///
72    /// This returns a reference with an unbounded lifetime. It is the caller's
73    /// responsibility to make sure it is not used after this `Interned` and its
74    /// inner `Slice` is dropped.
75    #[inline]
76    pub unsafe fn as_static_slice(&self) -> &'static T {
77        // SAFETY: `Interned::as_static_slice`'s caller upheld safety invariants
78        // are the same as `Slice::as_static_slice`'s caller upheld safety
79        // invariants.
80        unsafe { self.0.as_static_slice() }
81    }
82}
83
84impl fmt::Debug for Interned<str> {
85    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
86        self.0.fmt(f)
87    }
88}
89
90#[cfg(feature = "bytes")]
91impl fmt::Debug for Interned<[u8]> {
92    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93        self.0.fmt(f)
94    }
95}
96
97#[cfg(feature = "cstr")]
98impl fmt::Debug for Interned<CStr> {
99    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
100        self.0.fmt(f)
101    }
102}
103
104#[cfg(feature = "osstr")]
105impl fmt::Debug for Interned<OsStr> {
106    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107        self.0.fmt(f)
108    }
109}
110
111#[cfg(feature = "path")]
112impl fmt::Debug for Interned<Path> {
113    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114        self.0.fmt(f)
115    }
116}
117
118/// Wrapper around `&'static` slices.
119///
120/// # Safety
121///
122/// Even though `Box` is a "unique owner" of the data in the `Owned` variant, it
123/// should not be mutably dereferenced, because `as_static_slice` promises the
124/// slice to be valid as long as the `Slice` is not dropped.
125///
126/// This is achieved by not exposing the `Slice` enum directly and only allowing
127/// shared access to its internals.
128enum Slice<T: 'static + ?Sized> {
129    /// True `'static` references.
130    Static(&'static T),
131    /// Owned `'static` references.
132    Owned(PinBox<T>),
133}
134
135impl<T> From<&'static T> for Slice<T>
136where
137    T: ?Sized,
138{
139    #[inline]
140    fn from(slice: &'static T) -> Self {
141        Self::Static(slice)
142    }
143}
144
145impl From<String> for Slice<str> {
146    #[inline]
147    fn from(owned: String) -> Self {
148        Self::Owned(PinBox::new(owned.into_boxed_str()))
149    }
150}
151
152impl From<Cow<'static, str>> for Slice<str> {
153    #[inline]
154    fn from(cow: Cow<'static, str>) -> Self {
155        match cow {
156            Cow::Borrowed(slice) => slice.into(),
157            Cow::Owned(owned) => owned.into(),
158        }
159    }
160}
161
162#[cfg(feature = "bytes")]
163impl From<Vec<u8>> for Slice<[u8]> {
164    #[inline]
165    fn from(owned: Vec<u8>) -> Self {
166        Self::Owned(PinBox::new(owned.into_boxed_slice()))
167    }
168}
169
170#[cfg(feature = "bytes")]
171impl From<Cow<'static, [u8]>> for Slice<[u8]> {
172    #[inline]
173    fn from(cow: Cow<'static, [u8]>) -> Self {
174        match cow {
175            Cow::Borrowed(slice) => slice.into(),
176            Cow::Owned(owned) => owned.into(),
177        }
178    }
179}
180
181#[cfg(feature = "cstr")]
182impl From<CString> for Slice<CStr> {
183    #[inline]
184    fn from(owned: CString) -> Self {
185        Self::Owned(PinBox::new(owned.into_boxed_c_str()))
186    }
187}
188
189#[cfg(feature = "cstr")]
190impl From<Cow<'static, CStr>> for Slice<CStr> {
191    #[inline]
192    fn from(cow: Cow<'static, CStr>) -> Self {
193        match cow {
194            Cow::Borrowed(slice) => slice.into(),
195            Cow::Owned(owned) => owned.into(),
196        }
197    }
198}
199
200#[cfg(feature = "osstr")]
201impl From<OsString> for Slice<OsStr> {
202    #[inline]
203    fn from(owned: OsString) -> Self {
204        Self::Owned(PinBox::new(owned.into_boxed_os_str()))
205    }
206}
207
208#[cfg(feature = "osstr")]
209impl From<Cow<'static, OsStr>> for Slice<OsStr> {
210    #[inline]
211    fn from(cow: Cow<'static, OsStr>) -> Self {
212        match cow {
213            Cow::Borrowed(slice) => slice.into(),
214            Cow::Owned(owned) => owned.into(),
215        }
216    }
217}
218
219#[cfg(feature = "path")]
220impl From<PathBuf> for Slice<Path> {
221    #[inline]
222    fn from(owned: PathBuf) -> Self {
223        Self::Owned(PinBox::new(owned.into_boxed_path()))
224    }
225}
226
227#[cfg(feature = "path")]
228impl From<Cow<'static, Path>> for Slice<Path> {
229    #[inline]
230    fn from(cow: Cow<'static, Path>) -> Self {
231        match cow {
232            Cow::Borrowed(slice) => slice.into(),
233            Cow::Owned(owned) => owned.into(),
234        }
235    }
236}
237
238impl<T> Slice<T>
239where
240    T: ?Sized,
241{
242    /// Return a reference to the inner slice.
243    #[inline]
244    fn as_slice(&self) -> &T {
245        match self {
246            Self::Static(slice) => slice,
247            Self::Owned(owned) => {
248                // SAFETY: `PinBox` acts like `Box`.
249                unsafe { owned.as_ref() }
250            }
251        }
252    }
253
254    /// Return a `'static` reference to the inner slice.
255    ///
256    /// # Safety
257    ///
258    /// This returns a reference with an unbounded lifetime. It is the caller's
259    /// responsibility to make sure it is not used after this `Slice` is
260    /// dropped.
261    #[inline]
262    unsafe fn as_static_slice(&self) -> &'static T {
263        match self {
264            Self::Static(slice) => slice,
265            Self::Owned(owned) => {
266                // SAFETY: This expression creates a reference with a `'static`
267                // lifetime from an owned buffer, which is permissible because:
268                //
269                // - `Slice` is an internal implementation detail of the various
270                //   symbol table data structures
271                // - The various symbol tables never give out `'static` references
272                //   to underlying byte contents.
273                // - The `map` field of the various symbol tables which contains
274                //   the `'static` references, is dropped before the owned buffers
275                //   stored in this `Slice`.
276                // - `PinBox` acts like `Box`.
277                unsafe {
278                    // Coerce the pointer to a `&'static T`.
279                    owned.as_ref()
280                }
281            }
282        }
283    }
284}
285
286impl fmt::Debug for Slice<str> {
287    /// Formats the string slice using the given formatter.
288    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
289        write!(f, "{:?}", self.as_slice())
290    }
291}
292
293#[cfg(feature = "bytes")]
294impl fmt::Debug for Slice<[u8]> {
295    /// Formats the byte slice using the given formatter.
296    ///
297    /// If alternate format is specified, e.g. `{:#?}`, the slice is assumed to
298    /// be conventionally UTF-8 and converted to a [`String`] lossily with
299    /// [`String::from_utf8_lossy`].
300    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
301        if f.alternate() {
302            write!(f, "{:?}", String::from_utf8_lossy(self.as_slice()))
303        } else {
304            write!(f, "{:?}", self.as_slice())
305        }
306    }
307}
308
309#[cfg(feature = "cstr")]
310impl fmt::Debug for Slice<CStr> {
311    /// Formats the `CStr` slice using the given formatter.
312    ///
313    /// If alternate format is specified, e.g. `{:#?}`, the slice is assumed to
314    /// be conventionally UTF-8 and converted to a [`String`] lossily with
315    /// [`String::from_utf8_lossy`].
316    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
317        if f.alternate() {
318            write!(
319                f,
320                "{:?}",
321                String::from_utf8_lossy(self.as_slice().to_bytes())
322            )
323        } else {
324            write!(f, "{:?}", self.as_slice())
325        }
326    }
327}
328
329#[cfg(feature = "osstr")]
330impl fmt::Debug for Slice<OsStr> {
331    /// Formats the `OsStr` slice using the given formatter.
332    ///
333    /// If alternate format is specified, e.g. `{:#?}`, the slice is assumed to
334    /// be conventionally UTF-8 and converted to a [`String`] lossily with
335    /// [`OsStr::to_string_lossy`].
336    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
337        if f.alternate() {
338            write!(f, "{:?}", self.as_slice().to_string_lossy())
339        } else {
340            write!(f, "{:?}", self.as_slice())
341        }
342    }
343}
344
345#[cfg(feature = "path")]
346impl fmt::Debug for Slice<Path> {
347    /// Formats the `Path` slice using the given formatter.
348    ///
349    /// If alternate format is specified, e.g. `{:#?}`, the slice is assumed to
350    /// be conventionally UTF-8 and converted to a [`String`] lossily with
351    /// [`Path::to_string_lossy`].
352    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353        if f.alternate() {
354            write!(f, "{:?}", self.as_slice().to_string_lossy())
355        } else {
356            write!(f, "{:?}", self.as_slice())
357        }
358    }
359}
360
361/// An abstraction over a `Box<T>` where T is an unsized slice type which moves
362/// the box by raw pointer. This type is required to satisfy Miri with
363/// `-Zmiri-retag-fields`. See #235, #236.
364///
365/// The `PinBox` type is derived from:
366///
367/// - <https://github.com/CAD97/simple-interner/blob/24a836e9f8a0173faf48438d711442c2a86659c1/src/interner.rs#L26-L56>
368/// - <https://github.com/artichoke/intaglio/pull/236#issuecomment-1651058752>
369/// - <https://github.com/artichoke/intaglio/pull/236#issuecomment-1652003240>
370///
371/// This code is placed into the public domain by @CAD97:
372///
373/// - <https://github.com/artichoke/intaglio/pull/236#issuecomment-1652393974>
374mod boxed {
375    use core::fmt;
376    use core::marker::PhantomData;
377    use core::ptr::NonNull;
378
379    /// A wrapper around box that does not provide &mut access to the pointee and
380    /// uses raw-pointer borrowing rules to avoid invalidating extant references.
381    ///
382    /// The resolved reference is guaranteed valid until the `PinBox` is dropped.
383    ///
384    /// This type is meant to allow the owned data in the given `Box<T>` to be moved
385    /// without being retagged by Miri. See #235, #236.
386    pub(crate) struct PinBox<T: ?Sized> {
387        ptr: NonNull<T>,
388        _marker: PhantomData<Box<T>>,
389    }
390
391    impl<T: ?Sized> PinBox<T> {
392        #[inline]
393        pub(crate) fn new(x: Box<T>) -> Self {
394            let ptr = Box::into_raw(x);
395            // SAFETY: `ptr` is derived from `Box::into_raw` and can never be null.
396            let ptr = unsafe { NonNull::new_unchecked(ptr) };
397            Self {
398                ptr,
399                _marker: PhantomData,
400            }
401        }
402
403        #[inline]
404        pub(crate) unsafe fn as_ref<'a>(&self) -> &'a T {
405            // SAFETY: `PinBox` acts like `Box`, `self.ptr` is non-null and points
406            // to a live `Box`.
407            unsafe { self.ptr.as_ref() }
408        }
409    }
410
411    impl<T: ?Sized> Drop for PinBox<T> {
412        fn drop(&mut self) {
413            // SAFETY: `PinBox` acts like `Box`.
414            unsafe {
415                drop(Box::from_raw(self.ptr.as_ptr()));
416            }
417        }
418    }
419
420    impl<T: ?Sized + fmt::Debug> fmt::Debug for PinBox<T> {
421        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
422            // SAFETY: `PinBox` acts like `Box`.
423            let s = unsafe { self.as_ref() };
424            s.fmt(f)
425        }
426    }
427
428    // SAFETY: `PinBox` acts like `Box`.
429    unsafe impl<T> Send for PinBox<T>
430    where
431        T: ?Sized,
432        Box<T>: Send,
433    {
434    }
435
436    // SAFETY: `PinBox` acts like `Box`.
437    unsafe impl<T> Sync for PinBox<T>
438    where
439        T: ?Sized,
440        Box<T>: Sync,
441    {
442    }
443
444    #[cfg(test)]
445    mod tests {
446        use core::fmt::Write;
447
448        use super::PinBox;
449
450        #[test]
451        fn test_drop() {
452            let x = "abc".to_string().into_boxed_str();
453            let x = PinBox::new(x);
454            drop(x);
455        }
456
457        #[test]
458        fn test_as_ref() {
459            let x = "abc".to_string().into_boxed_str();
460            let x = PinBox::new(x);
461
462            // SAFETY: `PinBox` acts like `Box` and contains a valid pointer.
463            assert_eq!(unsafe { x.as_ref() }, "abc");
464        }
465
466        #[test]
467        fn test_debug_format() {
468            let x = "abc".to_string().into_boxed_str();
469            let x = PinBox::new(x);
470
471            let mut buf = String::new();
472            write!(&mut buf, "{x:?}").unwrap();
473            assert_eq!(buf, "\"abc\"");
474        }
475    }
476}
477
478#[cfg(test)]
479mod tests {
480    use core::fmt::Write;
481    use std::borrow::Cow;
482    #[cfg(feature = "cstr")]
483    use std::ffi::CStr;
484    #[cfg(feature = "osstr")]
485    use std::ffi::OsStr;
486    #[cfg(feature = "path")]
487    use std::path::Path;
488
489    use super::Interned;
490
491    #[test]
492    fn test_interned_static_str_debug_format() {
493        let s = Interned::from(Cow::Borrowed("abc"));
494        let mut buf = String::new();
495        write!(&mut buf, "{s:?}").unwrap();
496        assert_eq!(buf, "\"abc\"");
497    }
498
499    #[test]
500    fn test_interned_owned_str_debug_format() {
501        let s = Interned::<str>::from(Cow::Owned("abc".to_string()));
502        let mut buf = String::new();
503        write!(&mut buf, "{s:?}").unwrap();
504        assert_eq!(buf, "\"abc\"");
505    }
506
507    #[test]
508    #[cfg(feature = "bytes")]
509    fn test_interned_static_bytes_debug_format() {
510        let s = Interned::from(Cow::Borrowed(&b"abc"[..]));
511        let mut buf = String::new();
512        write!(&mut buf, "{s:?}").unwrap();
513        assert_eq!(buf, "[97, 98, 99]");
514
515        let s = Interned::from(Cow::Borrowed(&b"\xFF"[..]));
516        let mut buf = String::new();
517        write!(&mut buf, "{s:?}").unwrap();
518        assert_eq!(buf, "[255]");
519
520        let s = Interned::from(Cow::Borrowed(&b"abc"[..]));
521        let mut buf = String::new();
522        write!(&mut buf, "{s:#?}").unwrap();
523        assert_eq!(buf, "\"abc\"");
524
525        let s = Interned::from(Cow::Borrowed(&b"\xFF"[..]));
526        let mut buf = String::new();
527        write!(&mut buf, "{s:#?}").unwrap();
528        assert_eq!(buf, "\"\u{FFFD}\"");
529    }
530
531    #[test]
532    #[cfg(feature = "bytes")]
533    fn test_interned_owned_bytes_debug_format() {
534        let s = Interned::<[u8]>::from(Cow::Owned(b"abc".to_vec()));
535        let mut buf = String::new();
536        write!(&mut buf, "{s:?}").unwrap();
537        assert_eq!(buf, "[97, 98, 99]");
538
539        let s = Interned::<[u8]>::from(Cow::Owned(b"\xFF".to_vec()));
540        let mut buf = String::new();
541        write!(&mut buf, "{s:?}").unwrap();
542        assert_eq!(buf, "[255]");
543
544        let s = Interned::<[u8]>::from(Cow::Owned(b"abc".to_vec()));
545        let mut buf = String::new();
546        write!(&mut buf, "{s:#?}").unwrap();
547        assert_eq!(buf, "\"abc\"");
548
549        let s = Interned::<[u8]>::from(Cow::Owned(b"\xFF".to_vec()));
550        let mut buf = String::new();
551        write!(&mut buf, "{s:#?}").unwrap();
552        assert_eq!(buf, "\"\u{FFFD}\"");
553    }
554
555    #[test]
556    #[cfg(feature = "cstr")]
557    fn test_interned_static_cstr_debug_format() {
558        let s = Interned::from(Cow::Borrowed(
559            CStr::from_bytes_with_nul(b"abc\x00").unwrap(),
560        ));
561        let mut buf = String::new();
562        write!(&mut buf, "{s:?}").unwrap();
563        assert_eq!(buf, "\"abc\"");
564
565        let s = Interned::from(Cow::Borrowed(
566            CStr::from_bytes_with_nul(b"\xFF\x00").unwrap(),
567        ));
568        let mut buf = String::new();
569        write!(&mut buf, "{s:?}").unwrap();
570        assert_eq!(buf, r#""\xff""#);
571
572        let s = Interned::from(Cow::Borrowed(
573            CStr::from_bytes_with_nul(b"abc\x00").unwrap(),
574        ));
575        let mut buf = String::new();
576        write!(&mut buf, "{s:#?}").unwrap();
577        assert_eq!(buf, "\"abc\"");
578
579        let s = Interned::from(Cow::Borrowed(
580            CStr::from_bytes_with_nul(b"\xFF\x00").unwrap(),
581        ));
582        let mut buf = String::new();
583        write!(&mut buf, "{s:#?}").unwrap();
584        assert_eq!(buf, "\"\u{FFFD}\"");
585    }
586
587    #[test]
588    #[cfg(feature = "cstr")]
589    fn test_interned_owned_cstring_debug_format() {
590        let s = Interned::<CStr>::from(Cow::Owned(
591            CStr::from_bytes_with_nul(b"abc\x00").unwrap().to_owned(),
592        ));
593        let mut buf = String::new();
594        write!(&mut buf, "{s:?}").unwrap();
595        assert_eq!(buf, "\"abc\"");
596
597        let s = Interned::<CStr>::from(Cow::Owned(
598            CStr::from_bytes_with_nul(b"\xFF\x00").unwrap().to_owned(),
599        ));
600        let mut buf = String::new();
601        write!(&mut buf, "{s:?}").unwrap();
602        assert_eq!(buf, r#""\xff""#);
603
604        let s = Interned::<CStr>::from(Cow::Owned(
605            CStr::from_bytes_with_nul(b"abc\x00").unwrap().to_owned(),
606        ));
607        let mut buf = String::new();
608        write!(&mut buf, "{s:#?}").unwrap();
609        assert_eq!(buf, "\"abc\"");
610
611        let s = Interned::<CStr>::from(Cow::Owned(
612            CStr::from_bytes_with_nul(b"\xFF\x00").unwrap().to_owned(),
613        ));
614        let mut buf = String::new();
615        write!(&mut buf, "{s:#?}").unwrap();
616        assert_eq!(buf, "\"\u{FFFD}\"");
617    }
618
619    #[test]
620    #[cfg(feature = "osstr")]
621    fn test_interned_static_osstr_debug_format() {
622        let s = Interned::from(Cow::Borrowed(OsStr::new("abc")));
623        let mut buf = String::new();
624        write!(&mut buf, "{s:?}").unwrap();
625        assert_eq!(buf, "\"abc\"");
626
627        let s = Interned::from(Cow::Borrowed(OsStr::new("abc")));
628        let mut buf = String::new();
629        write!(&mut buf, "{s:#?}").unwrap();
630        assert_eq!(buf, "\"abc\"");
631    }
632
633    #[test]
634    #[cfg(feature = "osstr")]
635    fn test_interned_owned_osstring_debug_format() {
636        let s = Interned::<OsStr>::from(Cow::Owned(OsStr::new("abc").to_owned()));
637        let mut buf = String::new();
638        write!(&mut buf, "{s:?}").unwrap();
639        assert_eq!(buf, "\"abc\"");
640
641        let s = Interned::<OsStr>::from(Cow::Owned(OsStr::new("abc").to_owned()));
642        let mut buf = String::new();
643        write!(&mut buf, "{s:#?}").unwrap();
644        assert_eq!(buf, "\"abc\"");
645    }
646
647    #[test]
648    #[cfg(feature = "path")]
649    fn test_interned_static_path_debug_format() {
650        let s = Interned::from(Cow::Borrowed(Path::new("abc")));
651        let mut buf = String::new();
652        write!(&mut buf, "{s:?}").unwrap();
653        assert_eq!(buf, "\"abc\"");
654
655        let s = Interned::from(Cow::Borrowed(Path::new("abc")));
656        let mut buf = String::new();
657        write!(&mut buf, "{s:#?}").unwrap();
658        assert_eq!(buf, "\"abc\"");
659    }
660
661    #[test]
662    #[cfg(feature = "path")]
663    fn test_interned_owned_pathbuf_debug_format() {
664        let s = Interned::<Path>::from(Cow::Owned(Path::new("abc").to_owned()));
665        let mut buf = String::new();
666        write!(&mut buf, "{s:?}").unwrap();
667        assert_eq!(buf, "\"abc\"");
668
669        let s = Interned::<Path>::from(Cow::Owned(Path::new("abc").to_owned()));
670        let mut buf = String::new();
671        write!(&mut buf, "{s:#?}").unwrap();
672        assert_eq!(buf, "\"abc\"");
673    }
674}