mezzaluna_loaded_features/loaded_features/
mod.rs

1//! A set to track loaded Ruby source paths based on a [`Vec`] and [`HashSet`].
2//!
3//! Ruby tracks which files and native extensions have been [required] in a
4//! global variable called `$LOADED_FEATURES`, which is aliased to `$"`.
5//! `$LOADED_FEATURES` is an `Array` of paths which point to these Ruby sources
6//! and native extensions.
7//!
8//! This module exposes an append-only, insertion order-preserving, set-like
9//! container for tracking disk and in-memory Ruby sources as they are
10//! evaluated on a Ruby interpreter using [`require`] and [`require_relative`].
11//!
12//! See [`LoadedFeatures`] for more documentation on how to use the types in
13//! this module.
14//!
15//! [required]: https://ruby-doc.org/core-3.1.2/Kernel.html#method-i-require
16//! [`require`]: https://ruby-doc.org/core-3.1.2/Kernel.html#method-i-require
17//! [`require_relative`]: https://ruby-doc.org/core-3.1.2/Kernel.html#method-i-require_relative
18
19use core::hash::BuildHasher;
20use std::collections::TryReserveError;
21use std::collections::hash_map::RandomState;
22use std::collections::hash_set::HashSet;
23use std::path::{Path, PathBuf};
24
25mod iter;
26
27pub use iter::{Features, Iter};
28
29use crate::Feature;
30
31/// A set of all sources loaded by a Ruby interpreter with [`require`] and
32/// [`require_relative`].
33///
34/// In Ruby, when loading files with `require` and `require_relative`, the
35/// constants defined in them have global scope. Ruby keeps track of loaded
36/// sources in its interpreter state to ensure files are not `require`'d
37/// multiple times.
38///
39/// Ruby refers to files tracked in this way as _features_. The set of loaded
40/// features are stored in a global variable called `$LOADED_FEATURES`, which is
41/// aliased to `$"`.
42///
43/// `$LOADED_FEATURES` is an append only set. Disk-based features are
44/// deduplicated by their real position on the underlying file system (i.e.
45/// their device and inode).
46///
47/// Ruby uses a feature's presence in the loaded features set to determine
48/// whether a require has side effects (i.e. a file can be required multiple
49/// times but is only evaluated once).
50///
51/// # Examples
52///
53/// ```
54/// use mezzaluna_loaded_features::{Feature, LoadedFeatures};
55///
56/// let mut features = LoadedFeatures::new();
57/// features.insert(Feature::with_in_memory_path("/src/_lib/test.rb".into()));
58/// features.insert(Feature::with_in_memory_path("set.rb".into()));
59/// features.insert(Feature::with_in_memory_path("artichoke.rb".into()));
60///
61/// for f in features.features() {
62///     println!("Loaded feature at: {}", f.path().display());
63/// }
64///
65/// features.shrink_to_fit();
66/// ```
67///
68/// [`require`]: https://ruby-doc.org/core-3.1.2/Kernel.html#method-i-require
69/// [`require_relative`]: https://ruby-doc.org/core-3.1.2/Kernel.html#method-i-require_relative
70#[derive(Debug)]
71pub struct LoadedFeatures<S = RandomState> {
72    features: HashSet<Feature, S>,
73    paths: Vec<PathBuf>,
74}
75
76impl Default for LoadedFeatures {
77    fn default() -> Self {
78        Self::new()
79    }
80}
81
82impl<'a, S> IntoIterator for &'a LoadedFeatures<S> {
83    type IntoIter = Iter<'a>;
84    type Item = &'a Path;
85
86    fn into_iter(self) -> Self::IntoIter {
87        self.iter()
88    }
89}
90
91impl LoadedFeatures<RandomState> {
92    /// Creates an empty `LoadedFeatures`.
93    ///
94    /// The set of features is initially created with a capacity of 0, so it
95    /// will not allocate until it is first inserted into.
96    ///
97    /// # Examples
98    ///
99    /// ```
100    /// use mezzaluna_loaded_features::LoadedFeatures;
101    ///
102    /// let features = LoadedFeatures::new();
103    /// assert!(features.is_empty());
104    /// assert_eq!(features.capacity(), 0);
105    /// ```
106    #[must_use]
107    pub fn new() -> Self {
108        let features = HashSet::new();
109        let paths = Vec::new();
110        Self { features, paths }
111    }
112
113    /// Creates an empty `LoadedFeatures` with the specified capacity.
114    ///
115    /// The set of features will be able to hold at least `capacity` elements
116    /// without reallocating. If `capacity` is 0, the feature set will not
117    /// allocate.
118    ///
119    /// # Examples
120    ///
121    /// ```
122    /// use mezzaluna_loaded_features::LoadedFeatures;
123    ///
124    /// let features = LoadedFeatures::with_capacity(10);
125    /// assert!(features.capacity() >= 10);
126    /// ```
127    #[must_use]
128    pub fn with_capacity(capacity: usize) -> Self {
129        let features = HashSet::with_capacity(capacity);
130        let paths = Vec::with_capacity(capacity);
131        Self { features, paths }
132    }
133}
134
135impl<S> LoadedFeatures<S> {
136    /// Returns the number of elements the set of features can hold without
137    /// reallocating.
138    ///
139    /// # Examples
140    ///
141    /// ```
142    /// use mezzaluna_loaded_features::LoadedFeatures;
143    ///
144    /// let features = LoadedFeatures::with_capacity(100);
145    /// assert!(features.capacity() >= 100);
146    /// ```
147    #[must_use]
148    pub fn capacity(&self) -> usize {
149        usize::min(self.features.capacity(), self.paths.capacity())
150    }
151
152    /// An iterator visiting all features in insertion order. The iterator
153    /// element type is `&'a Path`.
154    ///
155    /// # Examples
156    ///
157    /// ```
158    /// use mezzaluna_loaded_features::{Feature, LoadedFeatures};
159    ///
160    /// let mut features = LoadedFeatures::new();
161    /// features.insert(Feature::with_in_memory_path("/src/_lib/test.rb".into()));
162    /// features.insert(Feature::with_in_memory_path("set.rb".into()));
163    /// features.insert(Feature::with_in_memory_path("artichoke.rb".into()));
164    ///
165    /// for path in features.iter() {
166    ///     println!("Loaded feature at: {}", path.display());
167    /// }
168    /// ```
169    #[must_use]
170    pub fn iter(&self) -> Iter<'_> {
171        let inner = self.paths.iter();
172        Iter { inner }
173    }
174
175    /// An iterator visiting all features in arbitrary order. The iterator
176    /// element type is `&'a Feature`.
177    ///
178    /// # Examples
179    ///
180    /// ```
181    /// use mezzaluna_loaded_features::{Feature, LoadedFeatures};
182    ///
183    /// let mut features = LoadedFeatures::new();
184    /// features.insert(Feature::with_in_memory_path("/src/_lib/test.rb".into()));
185    /// features.insert(Feature::with_in_memory_path("set.rb".into()));
186    /// features.insert(Feature::with_in_memory_path("artichoke.rb".into()));
187    ///
188    /// for f in features.features() {
189    ///     println!("Loaded feature at: {}", f.path().display());
190    /// }
191    /// ```
192    #[must_use]
193    pub fn features(&self) -> Features<'_> {
194        let inner = self.features.iter();
195        Features { inner }
196    }
197
198    /// Returns the number of features in the set.
199    ///
200    /// # Examples
201    ///
202    /// ```
203    /// use mezzaluna_loaded_features::{Feature, LoadedFeatures};
204    ///
205    /// let mut features = LoadedFeatures::new();
206    /// assert_eq!(features.len(), 0);
207    ///
208    /// features.insert(Feature::with_in_memory_path("/src/_lib/test.rb".into()));
209    /// assert_eq!(features.len(), 1);
210    /// ```
211    #[must_use]
212    pub fn len(&self) -> usize {
213        self.features.len()
214    }
215
216    /// Returns true if the set contains no features.
217    ///
218    /// # Examples
219    ///
220    /// ```
221    /// use mezzaluna_loaded_features::{Feature, LoadedFeatures};
222    ///
223    /// let mut features = LoadedFeatures::new();
224    /// assert!(features.is_empty());
225    ///
226    /// features.insert(Feature::with_in_memory_path("/src/_lib/test.rb".into()));
227    /// assert!(!features.is_empty());
228    /// ```
229    #[must_use]
230    pub fn is_empty(&self) -> bool {
231        self.features.is_empty()
232    }
233
234    /// Creates a new empty feature set which will use the given hasher to hash
235    /// keys.
236    ///
237    /// The feature set is also created with the default initial capacity.
238    ///
239    /// Warning: `hasher` is normally randomly generated, and is designed to
240    /// allow `LoadedFeatures` to be resistant to attacks that cause many
241    /// collisions and very poor performance. Setting it manually using this
242    /// function can expose a DoS attack vector.
243    ///
244    /// The `hash_builder` passed should implement the [`BuildHasher`] trait for
245    /// the `LoadedFeatures` to be useful, see its documentation for details.
246    ///
247    /// # Examples
248    ///
249    /// ```
250    /// use std::collections::hash_map::RandomState;
251    ///
252    /// use mezzaluna_loaded_features::{Feature, LoadedFeatures};
253    ///
254    /// let s = RandomState::new();
255    /// let mut features = LoadedFeatures::with_hasher(s);
256    /// features.insert(Feature::with_in_memory_path("set.rb".into()));
257    /// ```
258    #[must_use]
259    pub fn with_hasher(hasher: S) -> Self {
260        let features = HashSet::with_hasher(hasher);
261        let paths = Vec::new();
262        Self { features, paths }
263    }
264
265    /// Creates a new empty feature set with the specified capacity which will
266    /// use the given hasher to hash keys.
267    ///
268    /// The feature set will be able to hold at least `capacity` elements
269    /// without reallocating. If `capacity` is 0, the feature set will not
270    /// allocate.
271    ///
272    /// Warning: `hasher` is normally randomly generated, and is designed to
273    /// allow `LoadedFeatures` to be resistant to attacks that cause many
274    /// collisions and very poor performance. Setting it manually using this
275    /// function can expose a DoS attack vector.
276    ///
277    /// The `hash_builder` passed should implement the [`BuildHasher`] trait for
278    /// the `LoadedFeatures` to be useful, see its documentation for details.
279    ///
280    /// # Examples
281    ///
282    /// ```
283    /// use std::collections::hash_map::RandomState;
284    ///
285    /// use mezzaluna_loaded_features::{Feature, LoadedFeatures};
286    ///
287    /// let s = RandomState::new();
288    /// let mut features = LoadedFeatures::with_capacity_and_hasher(10, s);
289    /// features.insert(Feature::with_in_memory_path("set.rb".into()));
290    /// ```
291    #[must_use]
292    pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self {
293        let features = HashSet::with_capacity_and_hasher(capacity, hasher);
294        let paths = Vec::with_capacity(capacity);
295        Self { features, paths }
296    }
297
298    /// Returns a reference to the feature set's [`BuildHasher`].
299    ///
300    /// # Examples
301    ///
302    /// ```
303    /// use std::collections::hash_map::RandomState;
304    ///
305    /// use mezzaluna_loaded_features::LoadedFeatures;
306    ///
307    /// let s = RandomState::new();
308    /// let features = LoadedFeatures::with_hasher(s);
309    /// let hasher: &RandomState = features.hasher();
310    /// ```
311    #[must_use]
312    pub fn hasher(&self) -> &S {
313        self.features.hasher()
314    }
315}
316
317impl<S> LoadedFeatures<S>
318where
319    S: BuildHasher,
320{
321    /// Reserves capacity for at least `additional` more elements to be inserted
322    /// in the `LoadedFeatures`. The collection may reserve more space to avoid
323    /// frequent reallocations.
324    ///
325    /// # Panics
326    ///
327    /// Panics if the new allocation size overflows `usize`.
328    ///
329    /// # Examples
330    ///
331    /// ```
332    /// use mezzaluna_loaded_features::LoadedFeatures;
333    ///
334    /// let mut features = LoadedFeatures::new();
335    /// features.reserve(10);
336    /// assert!(features.capacity() >= 10);
337    /// ```
338    pub fn reserve(&mut self, additional: usize) {
339        self.features.reserve(additional);
340        self.paths.reserve(additional);
341    }
342
343    /// Tries to reserve capacity for at least `additional` more elements to be
344    /// inserted in the `LoadedFeatures`. The collection may reserve more space
345    /// to avoid frequent reallocations. After calling `try_reserve`, capacity
346    /// will be greater than or equal to `self.len() + additional`. Does nothing
347    /// if capacity is already sufficient.
348    ///
349    /// # Errors
350    ///
351    /// If the capacity overflows, or the allocator reports a failure, then an
352    /// error is returned.
353    ///
354    /// # Examples
355    ///
356    /// ```
357    /// use mezzaluna_loaded_features::LoadedFeatures;
358    ///
359    /// let mut features = LoadedFeatures::new();
360    /// features
361    ///     .try_reserve(10)
362    ///     .expect("why is this OOMing on 10 features?");
363    /// assert!(features.capacity() >= 10);
364    /// ```
365    pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
366        self.features.try_reserve(additional)?;
367        self.paths.try_reserve(additional)?;
368
369        Ok(())
370    }
371
372    /// Shrinks the capacity of the set as much as possible. It will drop down
373    /// as much as possible while maintaining the internal rules and possibly
374    /// leaving some space in accordance with the resize policy.
375    ///
376    /// # Examples
377    ///
378    /// ```
379    /// use mezzaluna_loaded_features::{Feature, LoadedFeatures};
380    ///
381    /// let mut features = LoadedFeatures::with_capacity(100);
382    /// features.insert(Feature::with_in_memory_path("set.rb".into()));
383    /// features.insert(Feature::with_in_memory_path("artichoke.rb".into()));
384    ///
385    /// assert!(features.capacity() >= 100);
386    /// features.shrink_to_fit();
387    /// assert!(features.capacity() >= 2);
388    /// ```
389    pub fn shrink_to_fit(&mut self) {
390        self.features.shrink_to_fit();
391        self.paths.shrink_to_fit();
392    }
393
394    /// Shrinks the capacity of the set with a lower bound.
395    ///
396    /// The capacity will remain at least as large as both the length and the
397    /// supplied value.
398    ///
399    /// If the current capacity is less than the lower limit, this is a no-op.
400    ///
401    /// # Examples
402    ///
403    /// ```
404    /// use mezzaluna_loaded_features::{Feature, LoadedFeatures};
405    ///
406    /// let mut features = LoadedFeatures::with_capacity(100);
407    /// features.insert(Feature::with_in_memory_path("set.rb".into()));
408    /// features.insert(Feature::with_in_memory_path("artichoke.rb".into()));
409    ///
410    /// assert!(features.capacity() >= 100);
411    /// features.shrink_to(2);
412    /// assert!(features.capacity() >= 2);
413    /// ```
414    pub fn shrink_to(&mut self, min_capacity: usize) {
415        self.features.shrink_to(min_capacity);
416        self.paths.shrink_to(min_capacity);
417    }
418
419    /// Returns true if the set contains a feature.
420    ///
421    /// Features loaded from disk are compared based on whether they point to
422    /// the same file on the underlying file system. Features loaded from memory
423    /// are compared by their paths.
424    ///
425    /// # Examples
426    ///
427    /// ```
428    /// use mezzaluna_loaded_features::{Feature, LoadedFeatures};
429    ///
430    /// let mut features = LoadedFeatures::new();
431    /// let set_feature = Feature::with_in_memory_path("set.rb".into());
432    ///
433    /// assert!(!features.contains(&set_feature));
434    ///
435    /// features.insert(set_feature);
436    /// assert_eq!(features.len(), 1);
437    /// ```
438    #[must_use]
439    pub fn contains(&self, feature: &Feature) -> bool {
440        self.features.contains(feature)
441    }
442
443    /// Add a feature to the set.
444    ///
445    /// # Panics
446    ///
447    /// Panics if the given feature is already loaded.
448    ///
449    /// # Examples
450    ///
451    /// ```
452    /// use mezzaluna_loaded_features::{Feature, LoadedFeatures};
453    ///
454    /// let mut features = LoadedFeatures::new();
455    /// let set_feature = Feature::with_in_memory_path("set.rb".into());
456    /// features.insert(set_feature);
457    ///
458    /// assert_eq!(features.len(), 1);
459    /// ```
460    pub fn insert(&mut self, feature: Feature) {
461        let path = feature.path().to_owned();
462        let feature_was_not_loaded = self.features.insert(feature);
463        assert!(
464            feature_was_not_loaded,
465            "duplicate feature inserted at {}",
466            path.display()
467        );
468        self.paths.push(path);
469    }
470}
471
472#[cfg(test)]
473mod tests {
474    use std::path::Path;
475    #[cfg(feature = "disk")]
476    use std::path::PathBuf;
477
478    #[cfg(feature = "disk")]
479    use same_file::Handle;
480
481    use super::{Feature, LoadedFeatures};
482
483    #[test]
484    #[should_panic(expected = "duplicate feature inserted at set.rb")]
485    fn duplicate_memory_insert_panics() {
486        let mut features = LoadedFeatures::new();
487        features.insert(Feature::with_in_memory_path("set.rb".into()));
488        features.insert(Feature::with_in_memory_path("set.rb".into()));
489    }
490
491    #[test]
492    fn insert_multiple_memory_features() {
493        let mut features = LoadedFeatures::new();
494        features.insert(Feature::with_in_memory_path("set.rb".into()));
495        features.insert(Feature::with_in_memory_path("hash.rb".into()));
496        features.insert(Feature::with_in_memory_path("artichoke.rb".into()));
497
498        assert_eq!(features.len(), 3);
499
500        let paths = features.iter().collect::<Vec<_>>();
501        assert_eq!(paths.len(), 3);
502        assert_eq!(
503            paths,
504            &[Path::new("set.rb"), Path::new("hash.rb"), Path::new("artichoke.rb")]
505        );
506    }
507
508    #[test]
509    #[cfg(feature = "disk")]
510    #[should_panic(expected = "duplicate feature inserted at Cargo.toml")]
511    fn duplicate_disk_insert_panics() {
512        let mut features = LoadedFeatures::new();
513        loop {
514            let path = Path::new(env!("CARGO_MANIFEST_PATH"));
515            let handle = Handle::from_path(path).unwrap();
516            features.insert(Feature::with_handle_and_path(handle, PathBuf::from("Cargo.toml")));
517        }
518    }
519
520    // ```shell
521    // $ echo 'puts __FILE__' > a.rb
522    // $ irb
523    // [3.2.2] > require './a.rb'
524    // /Users/lopopolo/dev/artichoke/artichoke/a.rb
525    // => true
526    // [3.2.2] > require '../artichoke/a.rb'
527    // => false
528    // ```
529    #[test]
530    #[cfg(feature = "disk")]
531    #[cfg_attr(
532        not(windows),
533        should_panic(expected = "duplicate feature inserted at src/../Cargo.toml")
534    )]
535    #[cfg_attr(
536        windows,
537        should_panic(expected = "duplicate feature inserted at src\\..\\Cargo.toml")
538    )]
539    fn duplicate_disk_insert_with_different_path_panics() {
540        let mut features = LoadedFeatures::new();
541
542        let path = Path::new(env!("CARGO_MANIFEST_PATH"));
543        let handle = Handle::from_path(path).unwrap();
544        features.insert(Feature::with_handle_and_path(handle, PathBuf::from("Cargo.toml")));
545
546        let path = Path::new(env!("CARGO_MANIFEST_DIR"))
547            .join("src")
548            .join("..")
549            .join("Cargo.toml");
550        let handle = Handle::from_path(&path).unwrap();
551        features.insert(Feature::with_handle_and_path(
552            handle,
553            path.strip_prefix(env!("CARGO_MANIFEST_DIR")).unwrap().to_owned(),
554        ));
555    }
556
557    #[test]
558    #[cfg(feature = "disk")]
559    fn insert_multiple_disk_features() {
560        let mut features = LoadedFeatures::new();
561
562        let path = Path::new(env!("CARGO_MANIFEST_PATH"));
563        let handle = Handle::from_path(path).unwrap();
564        features.insert(Feature::with_handle_and_path(handle, PathBuf::from("Cargo.toml")));
565
566        let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("LICENSE");
567        let handle = Handle::from_path(&path).unwrap();
568        features.insert(Feature::with_handle_and_path(
569            handle,
570            path.strip_prefix(env!("CARGO_MANIFEST_DIR")).unwrap().to_owned(),
571        ));
572
573        let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("README.md");
574        let handle = Handle::from_path(&path).unwrap();
575        features.insert(Feature::with_handle_and_path(
576            handle,
577            path.strip_prefix(env!("CARGO_MANIFEST_DIR")).unwrap().to_owned(),
578        ));
579
580        assert_eq!(features.len(), 3);
581
582        let paths = features.iter().collect::<Vec<_>>();
583        assert_eq!(paths.len(), 3);
584        assert_eq!(
585            paths,
586            &[Path::new("Cargo.toml"), Path::new("LICENSE"), Path::new("README.md")]
587        );
588    }
589
590    #[test]
591    fn iter_yields_paths_in_insertion_order() {
592        let mut features = LoadedFeatures::new();
593        features.insert(Feature::with_in_memory_path("a.rb".into()));
594        features.insert(Feature::with_in_memory_path("b.rb".into()));
595        features.insert(Feature::with_in_memory_path("c.rb".into()));
596        features.insert(Feature::with_in_memory_path("d.rb".into()));
597        features.insert(Feature::with_in_memory_path("e.rb".into()));
598        features.insert(Feature::with_in_memory_path("f.rb".into()));
599        features.insert(Feature::with_in_memory_path("g.rb".into()));
600
601        assert_eq!(features.len(), 7);
602
603        let paths = features.iter().collect::<Vec<_>>();
604        assert_eq!(paths.len(), 7);
605        assert_eq!(
606            paths,
607            &[
608                Path::new("a.rb"),
609                Path::new("b.rb"),
610                Path::new("c.rb"),
611                Path::new("d.rb"),
612                Path::new("e.rb"),
613                Path::new("f.rb"),
614                Path::new("g.rb"),
615            ]
616        );
617    }
618
619    #[test]
620    fn features_iter_yields_all_features() {
621        let mut features = LoadedFeatures::new();
622        features.insert(Feature::with_in_memory_path("a.rb".into()));
623        features.insert(Feature::with_in_memory_path("b.rb".into()));
624        features.insert(Feature::with_in_memory_path("c.rb".into()));
625        features.insert(Feature::with_in_memory_path("d.rb".into()));
626        features.insert(Feature::with_in_memory_path("e.rb".into()));
627        features.insert(Feature::with_in_memory_path("f.rb".into()));
628        features.insert(Feature::with_in_memory_path("g.rb".into()));
629
630        assert_eq!(features.len(), 7);
631
632        let mut feats = features.features().collect::<Vec<_>>();
633        assert_eq!(feats.len(), 7);
634
635        feats.sort_unstable_by_key(|f| f.path());
636        let paths = feats.into_iter().map(Feature::path).collect::<Vec<_>>();
637        assert_eq!(
638            paths,
639            &[
640                Path::new("a.rb"),
641                Path::new("b.rb"),
642                Path::new("c.rb"),
643                Path::new("d.rb"),
644                Path::new("e.rb"),
645                Path::new("f.rb"),
646                Path::new("g.rb"),
647            ]
648        );
649    }
650}