rustix/backend/linux_raw/fs/
dir.rs

1use crate::fd::{AsFd, BorrowedFd, OwnedFd};
2use crate::ffi::{CStr, CString};
3use crate::fs::{
4    fcntl_getfl, fstat, fstatfs, fstatvfs, openat, FileType, Mode, OFlags, Stat, StatFs, StatVfs,
5};
6use crate::io;
7#[cfg(feature = "process")]
8use crate::process::fchdir;
9use crate::utils::as_ptr;
10use alloc::borrow::ToOwned as _;
11use alloc::vec::Vec;
12use core::fmt;
13use core::mem::size_of;
14use linux_raw_sys::general::{linux_dirent64, SEEK_SET};
15
16/// `DIR*`
17pub struct Dir {
18    /// The `OwnedFd` that we read directory entries from.
19    fd: OwnedFd,
20
21    /// Have we seen any errors in this iteration?
22    any_errors: bool,
23
24    /// Should we rewind the stream on the next iteration?
25    rewind: bool,
26
27    /// The buffer for `linux_dirent64` entries.
28    buf: Vec<u8>,
29
30    /// Where we are in the buffer.
31    pos: usize,
32}
33
34impl Dir {
35    /// Take ownership of `fd` and construct a `Dir` that reads entries from
36    /// the given directory file descriptor.
37    #[inline]
38    pub fn new<Fd: Into<OwnedFd>>(fd: Fd) -> io::Result<Self> {
39        Self::_new(fd.into())
40    }
41
42    #[inline]
43    fn _new(fd: OwnedFd) -> io::Result<Self> {
44        Ok(Self {
45            fd,
46            any_errors: false,
47            rewind: false,
48            buf: Vec::new(),
49            pos: 0,
50        })
51    }
52
53    /// Borrow `fd` and construct a `Dir` that reads entries from the given
54    /// directory file descriptor.
55    #[inline]
56    pub fn read_from<Fd: AsFd>(fd: Fd) -> io::Result<Self> {
57        Self::_read_from(fd.as_fd())
58    }
59
60    #[inline]
61    fn _read_from(fd: BorrowedFd<'_>) -> io::Result<Self> {
62        let flags = fcntl_getfl(fd)?;
63        let fd_for_dir = openat(fd, cstr!("."), flags | OFlags::CLOEXEC, Mode::empty())?;
64
65        Ok(Self {
66            fd: fd_for_dir,
67            any_errors: false,
68            rewind: false,
69            buf: Vec::new(),
70            pos: 0,
71        })
72    }
73
74    /// `rewinddir(self)`
75    #[inline]
76    pub fn rewind(&mut self) {
77        self.any_errors = false;
78        self.rewind = true;
79        self.pos = self.buf.len();
80    }
81
82    /// `seekdir(self, offset)`
83    ///
84    /// This function is only available on 64-bit platforms because it's
85    /// implemented using [`libc::seekdir`] which only supports offsets that
86    /// fit in a `c_long`.
87    ///
88    /// [`libc::seekdir`]: https://docs.rs/libc/*/arm-unknown-linux-gnueabihf/libc/fn.seekdir.html
89    // In the linux_raw backend here, we don't use `libc::seekdir` and don't
90    // have this limitation, but it's a goal of rustix to support the same API
91    // on both the linux_raw and libc backends.
92    #[cfg(target_pointer_width = "64")]
93    #[cfg_attr(docsrs, doc(cfg(target_pointer_width = "64")))]
94    #[doc(alias = "seekdir")]
95    #[inline]
96    pub fn seek(&mut self, offset: i64) -> io::Result<()> {
97        self.any_errors = false;
98        self.rewind = false;
99        self.pos = self.buf.len();
100        match io::retry_on_intr(|| {
101            crate::backend::fs::syscalls::_seek(self.fd.as_fd(), offset, SEEK_SET)
102        }) {
103            Ok(_) => Ok(()),
104            Err(err) => {
105                self.any_errors = true;
106                Err(err)
107            }
108        }
109    }
110
111    /// `readdir(self)`, where `None` means the end of the directory.
112    pub fn read(&mut self) -> Option<io::Result<DirEntry>> {
113        // If we've seen errors, don't continue to try to read anything
114        // further.
115        if self.any_errors {
116            return None;
117        }
118
119        // If a rewind was requested, seek to the beginning.
120        if self.rewind {
121            self.rewind = false;
122            match io::retry_on_intr(|| {
123                crate::backend::fs::syscalls::_seek(self.fd.as_fd(), 0, SEEK_SET)
124            }) {
125                Ok(_) => (),
126                Err(err) => {
127                    self.any_errors = true;
128                    return Some(Err(err));
129                }
130            }
131        }
132
133        // Compute linux_dirent64 field offsets.
134        let z = linux_dirent64 {
135            d_ino: 0_u64,
136            d_off: 0_i64,
137            d_type: 0_u8,
138            d_reclen: 0_u16,
139            d_name: Default::default(),
140        };
141        let base = as_ptr(&z) as usize;
142        let offsetof_d_reclen = (as_ptr(&z.d_reclen) as usize) - base;
143        let offsetof_d_name = (as_ptr(&z.d_name) as usize) - base;
144        let offsetof_d_ino = (as_ptr(&z.d_ino) as usize) - base;
145        let offsetof_d_off = (as_ptr(&z.d_off) as usize) - base;
146        let offsetof_d_type = (as_ptr(&z.d_type) as usize) - base;
147
148        // Test if we need more entries, and if so, read more.
149        if self.buf.len() - self.pos < size_of::<linux_dirent64>() {
150            match self.read_more()? {
151                Ok(()) => (),
152                Err(err) => return Some(Err(err)),
153            }
154        }
155
156        // We successfully read an entry. Extract the fields.
157        let pos = self.pos;
158
159        // Do an unaligned u16 load.
160        let d_reclen = u16::from_ne_bytes([
161            self.buf[pos + offsetof_d_reclen],
162            self.buf[pos + offsetof_d_reclen + 1],
163        ]);
164        assert!(self.buf.len() - pos >= d_reclen as usize);
165        self.pos += d_reclen as usize;
166
167        // Read the NUL-terminated name from the `d_name` field. Without
168        // `unsafe`, we need to scan for the NUL twice: once to obtain a size
169        // for the slice, and then once within `CStr::from_bytes_with_nul`.
170        let name_start = pos + offsetof_d_name;
171        let name_len = self.buf[name_start..]
172            .iter()
173            .position(|x| *x == b'\0')
174            .unwrap();
175        let name = CStr::from_bytes_with_nul(&self.buf[name_start..][..=name_len]).unwrap();
176        let name = name.to_owned();
177        assert!(name.as_bytes().len() <= self.buf.len() - name_start);
178
179        // Do an unaligned `u64` load for `d_ino`.
180        let d_ino = u64::from_ne_bytes([
181            self.buf[pos + offsetof_d_ino],
182            self.buf[pos + offsetof_d_ino + 1],
183            self.buf[pos + offsetof_d_ino + 2],
184            self.buf[pos + offsetof_d_ino + 3],
185            self.buf[pos + offsetof_d_ino + 4],
186            self.buf[pos + offsetof_d_ino + 5],
187            self.buf[pos + offsetof_d_ino + 6],
188            self.buf[pos + offsetof_d_ino + 7],
189        ]);
190
191        // Do an unaligned `i64` load for `d_off`.
192        let d_off = i64::from_ne_bytes([
193            self.buf[pos + offsetof_d_off],
194            self.buf[pos + offsetof_d_off + 1],
195            self.buf[pos + offsetof_d_off + 2],
196            self.buf[pos + offsetof_d_off + 3],
197            self.buf[pos + offsetof_d_off + 4],
198            self.buf[pos + offsetof_d_off + 5],
199            self.buf[pos + offsetof_d_off + 6],
200            self.buf[pos + offsetof_d_off + 7],
201        ]);
202
203        let d_type = self.buf[pos + offsetof_d_type];
204
205        // Check that our types correspond to the `linux_dirent64` types.
206        let _ = linux_dirent64 {
207            d_ino,
208            d_off,
209            d_type,
210            d_reclen,
211            d_name: Default::default(),
212        };
213
214        Some(Ok(DirEntry {
215            d_ino,
216            d_off,
217            d_type,
218            name,
219        }))
220    }
221
222    #[must_use]
223    fn read_more(&mut self) -> Option<io::Result<()>> {
224        // The first few times we're called, we allocate a relatively small
225        // buffer, because many directories are small. If we're called more,
226        // use progressively larger allocations, up to a fixed maximum.
227        //
228        // The specific sizes and policy here have not been tuned in detail yet
229        // and may need to be adjusted. In doing so, we should be careful to
230        // avoid unbounded buffer growth. This buffer only exists to share the
231        // cost of a `getdents` call over many entries, so if it gets too big,
232        // cache and heap usage will outweigh the benefit. And ultimately,
233        // directories can contain more entries than we can allocate contiguous
234        // memory for, so we'll always need to cap the size at some point.
235        if self.buf.len() < 1024 * size_of::<linux_dirent64>() {
236            self.buf.reserve(32 * size_of::<linux_dirent64>());
237        }
238        self.buf.resize(self.buf.capacity(), 0);
239        let nread = match io::retry_on_intr(|| {
240            crate::backend::fs::syscalls::getdents(self.fd.as_fd(), &mut self.buf)
241        }) {
242            Ok(nread) => nread,
243            Err(io::Errno::NOENT) => {
244                self.any_errors = true;
245                return None;
246            }
247            Err(err) => {
248                self.any_errors = true;
249                return Some(Err(err));
250            }
251        };
252        self.buf.resize(nread, 0);
253        self.pos = 0;
254        if nread == 0 {
255            None
256        } else {
257            Some(Ok(()))
258        }
259    }
260
261    /// `fstat(self)`
262    #[inline]
263    pub fn stat(&self) -> io::Result<Stat> {
264        fstat(&self.fd)
265    }
266
267    /// `fstatfs(self)`
268    #[inline]
269    pub fn statfs(&self) -> io::Result<StatFs> {
270        fstatfs(&self.fd)
271    }
272
273    /// `fstatvfs(self)`
274    #[inline]
275    pub fn statvfs(&self) -> io::Result<StatVfs> {
276        fstatvfs(&self.fd)
277    }
278
279    /// `fchdir(self)`
280    #[cfg(feature = "process")]
281    #[cfg_attr(docsrs, doc(cfg(feature = "process")))]
282    #[inline]
283    pub fn chdir(&self) -> io::Result<()> {
284        fchdir(&self.fd)
285    }
286}
287
288impl Iterator for Dir {
289    type Item = io::Result<DirEntry>;
290
291    #[inline]
292    fn next(&mut self) -> Option<Self::Item> {
293        Self::read(self)
294    }
295}
296
297impl fmt::Debug for Dir {
298    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
299        f.debug_struct("Dir").field("fd", &self.fd).finish()
300    }
301}
302
303/// `struct dirent`
304#[derive(Debug)]
305pub struct DirEntry {
306    d_ino: u64,
307    d_type: u8,
308    d_off: i64,
309    name: CString,
310}
311
312impl DirEntry {
313    /// Returns the file name of this directory entry.
314    #[inline]
315    pub fn file_name(&self) -> &CStr {
316        &self.name
317    }
318
319    /// Returns the “offset” of this directory entry. This is not a true
320    /// numerical offset but an opaque cookie that identifies a position in the
321    /// given stream.
322    #[inline]
323    pub fn offset(&self) -> i64 {
324        self.d_off
325    }
326
327    /// Returns the type of this directory entry.
328    #[inline]
329    pub fn file_type(&self) -> FileType {
330        FileType::from_dirent_d_type(self.d_type)
331    }
332
333    /// Return the inode number of this directory entry.
334    #[inline]
335    pub fn ino(&self) -> u64 {
336        self.d_ino
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343
344    #[test]
345    fn dir_iterator_handles_io_errors() {
346        // create a dir, keep the FD, then delete the dir
347        let tmp = tempfile::tempdir().unwrap();
348        let fd = crate::fs::openat(
349            crate::fs::CWD,
350            tmp.path(),
351            crate::fs::OFlags::RDONLY | crate::fs::OFlags::CLOEXEC,
352            crate::fs::Mode::empty(),
353        )
354        .unwrap();
355
356        let file_fd = crate::fs::openat(
357            &fd,
358            tmp.path().join("test.txt"),
359            crate::fs::OFlags::WRONLY | crate::fs::OFlags::CREATE,
360            crate::fs::Mode::RWXU,
361        )
362        .unwrap();
363
364        let mut dir = Dir::read_from(&fd).unwrap();
365
366        // Reach inside the `Dir` and replace its directory with a file, which
367        // will cause the subsequent `getdents64` to fail.
368        crate::io::dup2(&file_fd, &mut dir.fd).unwrap();
369
370        assert!(matches!(dir.next(), Some(Err(_))));
371        assert!(dir.next().is_none());
372    }
373}