tz/parse/
tz_file.rs

1//! Functions used for parsing a TZif file.
2
3use crate::error::parse::TzFileError;
4use crate::error::TzError;
5use crate::parse::tz_string::parse_posix_tz;
6use crate::parse::utils::{read_chunk_exact, read_exact, Cursor};
7use crate::timezone::{LeapSecond, LocalTimeType, TimeZone, Transition, TransitionRule};
8
9use alloc::vec::Vec;
10use core::iter;
11use core::str;
12
13/// TZif version
14#[derive(Debug, Copy, Clone, Eq, PartialEq)]
15enum Version {
16    /// Version 1
17    V1,
18    /// Version 2
19    V2,
20    /// Version 3
21    V3,
22}
23
24/// TZif header
25#[derive(Debug)]
26struct Header {
27    /// TZif version
28    version: Version,
29    /// Number of UT/local indicators
30    ut_local_count: usize,
31    /// Number of standard/wall indicators
32    std_wall_count: usize,
33    /// Number of leap-second records
34    leap_count: usize,
35    /// Number of transition times
36    transition_count: usize,
37    /// Number of local time type records
38    type_count: usize,
39    /// Number of time zone designations bytes
40    char_count: usize,
41}
42
43/// Parse TZif header
44fn parse_header(cursor: &mut Cursor<'_>) -> Result<Header, TzFileError> {
45    let magic = read_exact(cursor, 4)?;
46    if magic != *b"TZif" {
47        return Err(TzFileError::InvalidMagicNumber);
48    }
49
50    let version = match read_exact(cursor, 1)? {
51        [0x00] => Version::V1,
52        [0x32] => Version::V2,
53        [0x33] => Version::V3,
54        _ => return Err(TzFileError::UnsupportedTzFileVersion),
55    };
56
57    read_exact(cursor, 15)?;
58
59    let ut_local_count = u32::from_be_bytes(*read_chunk_exact(cursor)?);
60    let std_wall_count = u32::from_be_bytes(*read_chunk_exact(cursor)?);
61    let leap_count = u32::from_be_bytes(*read_chunk_exact(cursor)?);
62    let transition_count = u32::from_be_bytes(*read_chunk_exact(cursor)?);
63    let type_count = u32::from_be_bytes(*read_chunk_exact(cursor)?);
64    let char_count = u32::from_be_bytes(*read_chunk_exact(cursor)?);
65
66    if !(type_count != 0 && char_count != 0 && (ut_local_count == 0 || ut_local_count == type_count) && (std_wall_count == 0 || std_wall_count == type_count)) {
67        return Err(TzFileError::InvalidHeader);
68    }
69
70    Ok(Header {
71        version,
72        ut_local_count: ut_local_count as usize,
73        std_wall_count: std_wall_count as usize,
74        leap_count: leap_count as usize,
75        transition_count: transition_count as usize,
76        type_count: type_count as usize,
77        char_count: char_count as usize,
78    })
79}
80
81/// Parse TZif footer
82fn parse_footer(footer: &[u8], use_string_extensions: bool) -> Result<Option<TransitionRule>, TzError> {
83    let footer = str::from_utf8(footer).map_err(TzFileError::from)?;
84    if !(footer.starts_with('\n') && footer.ends_with('\n')) {
85        return Err(TzError::TzFile(TzFileError::InvalidFooter));
86    }
87
88    let tz_string = footer.trim_matches(|c: char| c.is_ascii_whitespace());
89    if tz_string.starts_with(':') || tz_string.contains('\0') {
90        return Err(TzError::TzFile(TzFileError::InvalidFooter));
91    }
92
93    if !tz_string.is_empty() {
94        Ok(Some(parse_posix_tz(tz_string.as_bytes(), use_string_extensions)).transpose()?)
95    } else {
96        Ok(None)
97    }
98}
99
100/// TZif data blocks
101struct DataBlocks<'a, const TIME_SIZE: usize> {
102    /// Transition times data block
103    transition_times: &'a [u8],
104    /// Transition types data block
105    transition_types: &'a [u8],
106    /// Local time types data block
107    local_time_types: &'a [u8],
108    /// Time zone designations data block
109    time_zone_designations: &'a [u8],
110    /// Leap seconds data block
111    leap_seconds: &'a [u8],
112    /// UT/local indicators data block
113    std_walls: &'a [u8],
114    /// Standard/wall indicators data block
115    ut_locals: &'a [u8],
116}
117
118/// Read TZif data blocks
119fn read_data_blocks<'a, const TIME_SIZE: usize>(cursor: &mut Cursor<'a>, header: &Header) -> Result<DataBlocks<'a, TIME_SIZE>, TzFileError> {
120    Ok(DataBlocks {
121        transition_times: read_exact(cursor, header.transition_count * TIME_SIZE)?,
122        transition_types: read_exact(cursor, header.transition_count)?,
123        local_time_types: read_exact(cursor, header.type_count * 6)?,
124        time_zone_designations: read_exact(cursor, header.char_count)?,
125        leap_seconds: read_exact(cursor, header.leap_count * (TIME_SIZE + 4))?,
126        std_walls: read_exact(cursor, header.std_wall_count)?,
127        ut_locals: read_exact(cursor, header.ut_local_count)?,
128    })
129}
130
131trait ParseTime {
132    type TimeData;
133
134    fn parse_time(&self, data: &Self::TimeData) -> i64;
135}
136
137impl<'a> ParseTime for DataBlocks<'a, 4> {
138    type TimeData = [u8; 4];
139
140    fn parse_time(&self, data: &Self::TimeData) -> i64 {
141        i32::from_be_bytes(*data).into()
142    }
143}
144
145impl<'a> ParseTime for DataBlocks<'a, 8> {
146    type TimeData = [u8; 8];
147
148    fn parse_time(&self, data: &Self::TimeData) -> i64 {
149        i64::from_be_bytes(*data)
150    }
151}
152
153impl<'a, const TIME_SIZE: usize> DataBlocks<'a, TIME_SIZE>
154where
155    DataBlocks<'a, TIME_SIZE>: ParseTime<TimeData = [u8; TIME_SIZE]>,
156{
157    /// Parse time zone data
158    fn parse(&self, header: &Header, footer: Option<&[u8]>) -> Result<TimeZone, TzError> {
159        let mut transitions = Vec::with_capacity(header.transition_count);
160        for (time_data, &local_time_type_index) in self.transition_times.chunks_exact(TIME_SIZE).zip(self.transition_types) {
161            let time_data = time_data.first_chunk::<TIME_SIZE>().unwrap();
162
163            let unix_leap_time = self.parse_time(time_data);
164            let local_time_type_index = local_time_type_index as usize;
165            transitions.push(Transition::new(unix_leap_time, local_time_type_index));
166        }
167
168        let mut local_time_types = Vec::with_capacity(header.type_count);
169        for data in self.local_time_types.chunks_exact(6) {
170            let [d0, d1, d2, d3, d4, d5] = <[u8; 6]>::try_from(data).unwrap();
171
172            let ut_offset = i32::from_be_bytes([d0, d1, d2, d3]);
173
174            let is_dst = match d4 {
175                0 => false,
176                1 => true,
177                _ => return Err(TzError::TzFile(TzFileError::InvalidDstIndicator)),
178            };
179
180            let char_index = d5 as usize;
181            if char_index >= header.char_count {
182                return Err(TzError::TzFile(TzFileError::InvalidTimeZoneDesignationCharIndex));
183            }
184
185            let time_zone_designation = match self.time_zone_designations[char_index..].iter().position(|&c| c == b'\0') {
186                None => return Err(TzError::TzFile(TzFileError::InvalidTimeZoneDesignationCharIndex)),
187                Some(position) => {
188                    let time_zone_designation = &self.time_zone_designations[char_index..char_index + position];
189
190                    if !time_zone_designation.is_empty() {
191                        Some(time_zone_designation)
192                    } else {
193                        None
194                    }
195                }
196            };
197
198            local_time_types.push(LocalTimeType::new(ut_offset, is_dst, time_zone_designation)?);
199        }
200
201        let mut leap_seconds = Vec::with_capacity(header.leap_count);
202        for data in self.leap_seconds.chunks_exact(TIME_SIZE + 4) {
203            let (time_data, tail) = data.split_first_chunk::<TIME_SIZE>().unwrap();
204            let correction_data = tail.first_chunk::<4>().unwrap();
205
206            let unix_leap_time = self.parse_time(time_data);
207            let correction = i32::from_be_bytes(*correction_data);
208            leap_seconds.push(LeapSecond::new(unix_leap_time, correction));
209        }
210
211        let std_walls_iter = self.std_walls.iter().copied().chain(iter::repeat(0));
212        let ut_locals_iter = self.ut_locals.iter().copied().chain(iter::repeat(0));
213        for (std_wall, ut_local) in std_walls_iter.zip(ut_locals_iter).take(header.type_count) {
214            if !matches!((std_wall, ut_local), (0, 0) | (1, 0) | (1, 1)) {
215                return Err(TzError::TzFile(TzFileError::InvalidStdWallUtLocal));
216            }
217        }
218
219        let extra_rule = footer.and_then(|footer| parse_footer(footer, header.version == Version::V3).transpose()).transpose()?;
220
221        TimeZone::new(transitions, local_time_types, leap_seconds, extra_rule)
222    }
223}
224
225/// Parse TZif file as described in [RFC 8536](https://datatracker.ietf.org/doc/html/rfc8536)
226pub(crate) fn parse_tz_file(bytes: &[u8]) -> Result<TimeZone, TzError> {
227    let mut cursor = bytes;
228
229    let header = parse_header(&mut cursor)?;
230
231    match header.version {
232        Version::V1 => {
233            let data_blocks = read_data_blocks::<4>(&mut cursor, &header)?;
234
235            if !cursor.is_empty() {
236                return Err(TzError::TzFile(TzFileError::RemainingDataV1));
237            }
238
239            Ok(data_blocks.parse(&header, None)?)
240        }
241        Version::V2 | Version::V3 => {
242            // Skip v1 data block
243            read_data_blocks::<4>(&mut cursor, &header)?;
244
245            let header = parse_header(&mut cursor)?;
246            let data_blocks = read_data_blocks::<8>(&mut cursor, &header)?;
247            let footer = cursor;
248
249            Ok(data_blocks.parse(&header, Some(footer))?)
250        }
251    }
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257    use crate::timezone::{AlternateTime, MonthWeekDay, RuleDay, TimeZone};
258
259    use alloc::vec;
260
261    #[test]
262    fn test_v1_file_with_leap_seconds() -> Result<(), TzError> {
263        let bytes = b"TZif\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\0\x1b\0\0\0\0\0\0\0\x01\0\0\0\x04\0\0\0\0\0\0UTC\0\x04\xb2\x58\0\0\0\0\x01\x05\xa4\xec\x01\0\0\0\x02\x07\x86\x1f\x82\0\0\0\x03\x09\x67\x53\x03\0\0\0\x04\x0b\x48\x86\x84\0\0\0\x05\x0d\x2b\x0b\x85\0\0\0\x06\x0f\x0c\x3f\x06\0\0\0\x07\x10\xed\x72\x87\0\0\0\x08\x12\xce\xa6\x08\0\0\0\x09\x15\x9f\xca\x89\0\0\0\x0a\x17\x80\xfe\x0a\0\0\0\x0b\x19\x62\x31\x8b\0\0\0\x0c\x1d\x25\xea\x0c\0\0\0\x0d\x21\xda\xe5\x0d\0\0\0\x0e\x25\x9e\x9d\x8e\0\0\0\x0f\x27\x7f\xd1\x0f\0\0\0\x10\x2a\x50\xf5\x90\0\0\0\x11\x2c\x32\x29\x11\0\0\0\x12\x2e\x13\x5c\x92\0\0\0\x13\x30\xe7\x24\x13\0\0\0\x14\x33\xb8\x48\x94\0\0\0\x15\x36\x8c\x10\x15\0\0\0\x16\x43\xb7\x1b\x96\0\0\0\x17\x49\x5c\x07\x97\0\0\0\x18\x4f\xef\x93\x18\0\0\0\x19\x55\x93\x2d\x99\0\0\0\x1a\x58\x68\x46\x9a\0\0\0\x1b\0\0";
264
265        let time_zone = parse_tz_file(bytes)?;
266
267        let time_zone_result = TimeZone::new(
268            vec![],
269            vec![LocalTimeType::new(0, false, Some(b"UTC"))?],
270            vec![
271                LeapSecond::new(78796800, 1),
272                LeapSecond::new(94694401, 2),
273                LeapSecond::new(126230402, 3),
274                LeapSecond::new(157766403, 4),
275                LeapSecond::new(189302404, 5),
276                LeapSecond::new(220924805, 6),
277                LeapSecond::new(252460806, 7),
278                LeapSecond::new(283996807, 8),
279                LeapSecond::new(315532808, 9),
280                LeapSecond::new(362793609, 10),
281                LeapSecond::new(394329610, 11),
282                LeapSecond::new(425865611, 12),
283                LeapSecond::new(489024012, 13),
284                LeapSecond::new(567993613, 14),
285                LeapSecond::new(631152014, 15),
286                LeapSecond::new(662688015, 16),
287                LeapSecond::new(709948816, 17),
288                LeapSecond::new(741484817, 18),
289                LeapSecond::new(773020818, 19),
290                LeapSecond::new(820454419, 20),
291                LeapSecond::new(867715220, 21),
292                LeapSecond::new(915148821, 22),
293                LeapSecond::new(1136073622, 23),
294                LeapSecond::new(1230768023, 24),
295                LeapSecond::new(1341100824, 25),
296                LeapSecond::new(1435708825, 26),
297                LeapSecond::new(1483228826, 27),
298            ],
299            None,
300        )?;
301
302        assert_eq!(time_zone, time_zone_result);
303
304        Ok(())
305    }
306
307    #[test]
308    fn test_v2_file() -> Result<(), TzError> {
309        let bytes = b"TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x06\0\0\0\x06\0\0\0\0\0\0\0\x07\0\0\0\x06\0\0\0\x14\x80\0\0\0\xbb\x05\x43\x48\xbb\x21\x71\x58\xcb\x89\x3d\xc8\xd2\x23\xf4\x70\xd2\x61\x49\x38\xd5\x8d\x73\x48\x01\x02\x01\x03\x04\x01\x05\xff\xff\x6c\x02\0\0\xff\xff\x6c\x58\0\x04\xff\xff\x7a\x68\x01\x08\xff\xff\x7a\x68\x01\x0c\xff\xff\x7a\x68\x01\x10\xff\xff\x73\x60\0\x04LMT\0HST\0HDT\0HWT\0HPT\0\0\0\0\0\x01\0\0\0\0\0\x01\0TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x06\0\0\0\x06\0\0\0\0\0\0\0\x07\0\0\0\x06\0\0\0\x14\xff\xff\xff\xff\x74\xe0\x70\xbe\xff\xff\xff\xff\xbb\x05\x43\x48\xff\xff\xff\xff\xbb\x21\x71\x58\xff\xff\xff\xff\xcb\x89\x3d\xc8\xff\xff\xff\xff\xd2\x23\xf4\x70\xff\xff\xff\xff\xd2\x61\x49\x38\xff\xff\xff\xff\xd5\x8d\x73\x48\x01\x02\x01\x03\x04\x01\x05\xff\xff\x6c\x02\0\0\xff\xff\x6c\x58\0\x04\xff\xff\x7a\x68\x01\x08\xff\xff\x7a\x68\x01\x0c\xff\xff\x7a\x68\x01\x10\xff\xff\x73\x60\0\x04LMT\0HST\0HDT\0HWT\0HPT\0\0\0\0\0\x01\0\0\0\0\0\x01\0\x0aHST10\x0a";
310
311        let time_zone = parse_tz_file(bytes)?;
312
313        let time_zone_result = TimeZone::new(
314            vec![
315                Transition::new(-2334101314, 1),
316                Transition::new(-1157283000, 2),
317                Transition::new(-1155436200, 1),
318                Transition::new(-880198200, 3),
319                Transition::new(-769395600, 4),
320                Transition::new(-765376200, 1),
321                Transition::new(-712150200, 5),
322            ],
323            vec![
324                LocalTimeType::new(-37886, false, Some(b"LMT"))?,
325                LocalTimeType::new(-37800, false, Some(b"HST"))?,
326                LocalTimeType::new(-34200, true, Some(b"HDT"))?,
327                LocalTimeType::new(-34200, true, Some(b"HWT"))?,
328                LocalTimeType::new(-34200, true, Some(b"HPT"))?,
329                LocalTimeType::new(-36000, false, Some(b"HST"))?,
330            ],
331            vec![],
332            Some(TransitionRule::Fixed(LocalTimeType::new(-36000, false, Some(b"HST"))?)),
333        )?;
334
335        assert_eq!(time_zone, time_zone_result);
336
337        assert_eq!(*time_zone.find_local_time_type(-1156939200)?, LocalTimeType::new(-34200, true, Some(b"HDT"))?);
338        assert_eq!(*time_zone.find_local_time_type(1546300800)?, LocalTimeType::new(-36000, false, Some(b"HST"))?);
339
340        Ok(())
341    }
342
343    #[test]
344    fn test_v3_file() -> Result<(), TzError> {
345        let bytes = b"TZif3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x04\0\0\x1c\x20\0\0IST\0TZif3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\0\x04\0\0\0\0\x7f\xe8\x17\x80\0\0\0\x1c\x20\0\0IST\0\x01\x01\x0aIST-2IDT,M3.4.4/26,M10.5.0\x0a";
346
347        let time_zone = parse_tz_file(bytes)?;
348
349        let time_zone_result = TimeZone::new(
350            vec![Transition::new(2145916800, 0)],
351            vec![LocalTimeType::new(7200, false, Some(b"IST"))?],
352            vec![],
353            Some(TransitionRule::Alternate(AlternateTime::new(
354                LocalTimeType::new(7200, false, Some(b"IST"))?,
355                LocalTimeType::new(10800, true, Some(b"IDT"))?,
356                RuleDay::MonthWeekDay(MonthWeekDay::new(3, 4, 4)?),
357                93600,
358                RuleDay::MonthWeekDay(MonthWeekDay::new(10, 5, 0)?),
359                7200,
360            )?)),
361        )?;
362
363        assert_eq!(time_zone, time_zone_result);
364
365        Ok(())
366    }
367}