chrono/offset/local/tz_info/
parser.rs

1use std::io::{self, ErrorKind};
2use std::iter;
3use std::num::ParseIntError;
4use std::str::{self, FromStr};
5
6use super::rule::TransitionRule;
7use super::timezone::{LeapSecond, LocalTimeType, TimeZone, Transition};
8use super::Error;
9
10pub(super) fn parse(bytes: &[u8]) -> Result<TimeZone, Error> {
11    let mut cursor = Cursor::new(bytes);
12    let state = State::new(&mut cursor, true)?;
13    let (state, footer) = match state.header.version {
14        Version::V1 => match cursor.is_empty() {
15            true => (state, None),
16            false => {
17                return Err(Error::InvalidTzFile("remaining data after end of TZif v1 data block"))
18            }
19        },
20        Version::V2 | Version::V3 => {
21            let state = State::new(&mut cursor, false)?;
22            (state, Some(cursor.remaining()))
23        }
24    };
25
26    let mut transitions = Vec::with_capacity(state.header.transition_count);
27    for (arr_time, &local_time_type_index) in
28        state.transition_times.chunks_exact(state.time_size).zip(state.transition_types)
29    {
30        let unix_leap_time =
31            state.parse_time(&arr_time[0..state.time_size], state.header.version)?;
32        let local_time_type_index = local_time_type_index as usize;
33        transitions.push(Transition::new(unix_leap_time, local_time_type_index));
34    }
35
36    let mut local_time_types = Vec::with_capacity(state.header.type_count);
37    for arr in state.local_time_types.chunks_exact(6) {
38        let ut_offset = read_be_i32(&arr[..4])?;
39
40        let is_dst = match arr[4] {
41            0 => false,
42            1 => true,
43            _ => return Err(Error::InvalidTzFile("invalid DST indicator")),
44        };
45
46        let char_index = arr[5] as usize;
47        if char_index >= state.header.char_count {
48            return Err(Error::InvalidTzFile("invalid time zone name char index"));
49        }
50
51        let position = match state.names[char_index..].iter().position(|&c| c == b'\0') {
52            Some(position) => position,
53            None => return Err(Error::InvalidTzFile("invalid time zone name char index")),
54        };
55
56        let name = &state.names[char_index..char_index + position];
57        let name = if !name.is_empty() { Some(name) } else { None };
58        local_time_types.push(LocalTimeType::new(ut_offset, is_dst, name)?);
59    }
60
61    let mut leap_seconds = Vec::with_capacity(state.header.leap_count);
62    for arr in state.leap_seconds.chunks_exact(state.time_size + 4) {
63        let unix_leap_time = state.parse_time(&arr[0..state.time_size], state.header.version)?;
64        let correction = read_be_i32(&arr[state.time_size..state.time_size + 4])?;
65        leap_seconds.push(LeapSecond::new(unix_leap_time, correction));
66    }
67
68    let std_walls_iter = state.std_walls.iter().copied().chain(iter::repeat(0));
69    let ut_locals_iter = state.ut_locals.iter().copied().chain(iter::repeat(0));
70    if std_walls_iter.zip(ut_locals_iter).take(state.header.type_count).any(|pair| pair == (0, 1)) {
71        return Err(Error::InvalidTzFile(
72            "invalid couple of standard/wall and UT/local indicators",
73        ));
74    }
75
76    let extra_rule = match footer {
77        Some(footer) => {
78            let footer = str::from_utf8(footer)?;
79            if !(footer.starts_with('\n') && footer.ends_with('\n')) {
80                return Err(Error::InvalidTzFile("invalid footer"));
81            }
82
83            let tz_string = footer.trim_matches(|c: char| c.is_ascii_whitespace());
84            if tz_string.starts_with(':') || tz_string.contains('\0') {
85                return Err(Error::InvalidTzFile("invalid footer"));
86            }
87
88            match tz_string.is_empty() {
89                true => None,
90                false => Some(TransitionRule::from_tz_string(
91                    tz_string.as_bytes(),
92                    state.header.version == Version::V3,
93                )?),
94            }
95        }
96        None => None,
97    };
98
99    TimeZone::new(transitions, local_time_types, leap_seconds, extra_rule)
100}
101
102/// TZif data blocks
103struct State<'a> {
104    header: Header,
105    /// Time size in bytes
106    time_size: usize,
107    /// Transition times data block
108    transition_times: &'a [u8],
109    /// Transition types data block
110    transition_types: &'a [u8],
111    /// Local time types data block
112    local_time_types: &'a [u8],
113    /// Time zone names data block
114    names: &'a [u8],
115    /// Leap seconds data block
116    leap_seconds: &'a [u8],
117    /// UT/local indicators data block
118    std_walls: &'a [u8],
119    /// Standard/wall indicators data block
120    ut_locals: &'a [u8],
121}
122
123impl<'a> State<'a> {
124    /// Read TZif data blocks
125    fn new(cursor: &mut Cursor<'a>, first: bool) -> Result<Self, Error> {
126        let header = Header::new(cursor)?;
127        let time_size = match first {
128            true => 4, // We always parse V1 first
129            false => 8,
130        };
131
132        Ok(Self {
133            time_size,
134            transition_times: cursor.read_exact(header.transition_count * time_size)?,
135            transition_types: cursor.read_exact(header.transition_count)?,
136            local_time_types: cursor.read_exact(header.type_count * 6)?,
137            names: cursor.read_exact(header.char_count)?,
138            leap_seconds: cursor.read_exact(header.leap_count * (time_size + 4))?,
139            std_walls: cursor.read_exact(header.std_wall_count)?,
140            ut_locals: cursor.read_exact(header.ut_local_count)?,
141            header,
142        })
143    }
144
145    /// Parse time values
146    fn parse_time(&self, arr: &[u8], version: Version) -> Result<i64, Error> {
147        match version {
148            Version::V1 => Ok(read_be_i32(&arr[..4])?.into()),
149            Version::V2 | Version::V3 => read_be_i64(arr),
150        }
151    }
152}
153
154/// TZif header
155#[derive(Debug)]
156struct Header {
157    /// TZif version
158    version: Version,
159    /// Number of UT/local indicators
160    ut_local_count: usize,
161    /// Number of standard/wall indicators
162    std_wall_count: usize,
163    /// Number of leap-second records
164    leap_count: usize,
165    /// Number of transition times
166    transition_count: usize,
167    /// Number of local time type records
168    type_count: usize,
169    /// Number of time zone names bytes
170    char_count: usize,
171}
172
173impl Header {
174    fn new(cursor: &mut Cursor) -> Result<Self, Error> {
175        let magic = cursor.read_exact(4)?;
176        if magic != *b"TZif" {
177            return Err(Error::InvalidTzFile("invalid magic number"));
178        }
179
180        let version = match cursor.read_exact(1)? {
181            [0x00] => Version::V1,
182            [0x32] => Version::V2,
183            [0x33] => Version::V3,
184            _ => return Err(Error::UnsupportedTzFile("unsupported TZif version")),
185        };
186
187        cursor.read_exact(15)?;
188        let ut_local_count = cursor.read_be_u32()?;
189        let std_wall_count = cursor.read_be_u32()?;
190        let leap_count = cursor.read_be_u32()?;
191        let transition_count = cursor.read_be_u32()?;
192        let type_count = cursor.read_be_u32()?;
193        let char_count = cursor.read_be_u32()?;
194
195        if !(type_count != 0
196            && char_count != 0
197            && (ut_local_count == 0 || ut_local_count == type_count)
198            && (std_wall_count == 0 || std_wall_count == type_count))
199        {
200            return Err(Error::InvalidTzFile("invalid header"));
201        }
202
203        Ok(Self {
204            version,
205            ut_local_count: ut_local_count as usize,
206            std_wall_count: std_wall_count as usize,
207            leap_count: leap_count as usize,
208            transition_count: transition_count as usize,
209            type_count: type_count as usize,
210            char_count: char_count as usize,
211        })
212    }
213}
214
215/// A `Cursor` contains a slice of a buffer and a read count.
216#[derive(Debug, Eq, PartialEq)]
217pub(crate) struct Cursor<'a> {
218    /// Slice representing the remaining data to be read
219    remaining: &'a [u8],
220    /// Number of already read bytes
221    read_count: usize,
222}
223
224impl<'a> Cursor<'a> {
225    /// Construct a new `Cursor` from remaining data
226    pub(crate) const fn new(remaining: &'a [u8]) -> Self {
227        Self { remaining, read_count: 0 }
228    }
229
230    pub(crate) fn peek(&self) -> Option<&u8> {
231        self.remaining().first()
232    }
233
234    /// Returns remaining data
235    pub(crate) const fn remaining(&self) -> &'a [u8] {
236        self.remaining
237    }
238
239    /// Returns `true` if data is remaining
240    pub(crate) const fn is_empty(&self) -> bool {
241        self.remaining.is_empty()
242    }
243
244    pub(crate) fn read_be_u32(&mut self) -> Result<u32, Error> {
245        let mut buf = [0; 4];
246        buf.copy_from_slice(self.read_exact(4)?);
247        Ok(u32::from_be_bytes(buf))
248    }
249
250    /// Read exactly `count` bytes, reducing remaining data and incrementing read count
251    pub(crate) fn read_exact(&mut self, count: usize) -> Result<&'a [u8], io::Error> {
252        match (self.remaining.get(..count), self.remaining.get(count..)) {
253            (Some(result), Some(remaining)) => {
254                self.remaining = remaining;
255                self.read_count += count;
256                Ok(result)
257            }
258            _ => Err(io::Error::from(ErrorKind::UnexpectedEof)),
259        }
260    }
261
262    /// Read bytes and compare them to the provided tag
263    pub(crate) fn read_tag(&mut self, tag: &[u8]) -> Result<(), io::Error> {
264        if self.read_exact(tag.len())? == tag {
265            Ok(())
266        } else {
267            Err(io::Error::from(ErrorKind::InvalidData))
268        }
269    }
270
271    /// Read bytes if the remaining data is prefixed by the provided tag
272    pub(crate) fn read_optional_tag(&mut self, tag: &[u8]) -> Result<bool, io::Error> {
273        if self.remaining.starts_with(tag) {
274            self.read_exact(tag.len())?;
275            Ok(true)
276        } else {
277            Ok(false)
278        }
279    }
280
281    /// Read bytes as long as the provided predicate is true
282    pub(crate) fn read_while<F: Fn(&u8) -> bool>(&mut self, f: F) -> Result<&'a [u8], io::Error> {
283        match self.remaining.iter().position(|x| !f(x)) {
284            None => self.read_exact(self.remaining.len()),
285            Some(position) => self.read_exact(position),
286        }
287    }
288
289    // Parse an integer out of the ASCII digits
290    pub(crate) fn read_int<T: FromStr<Err = ParseIntError>>(&mut self) -> Result<T, Error> {
291        let bytes = self.read_while(u8::is_ascii_digit)?;
292        Ok(str::from_utf8(bytes)?.parse()?)
293    }
294
295    /// Read bytes until the provided predicate is true
296    pub(crate) fn read_until<F: Fn(&u8) -> bool>(&mut self, f: F) -> Result<&'a [u8], io::Error> {
297        match self.remaining.iter().position(f) {
298            None => self.read_exact(self.remaining.len()),
299            Some(position) => self.read_exact(position),
300        }
301    }
302}
303
304pub(crate) fn read_be_i32(bytes: &[u8]) -> Result<i32, Error> {
305    if bytes.len() != 4 {
306        return Err(Error::InvalidSlice("too short for i32"));
307    }
308
309    let mut buf = [0; 4];
310    buf.copy_from_slice(bytes);
311    Ok(i32::from_be_bytes(buf))
312}
313
314pub(crate) fn read_be_i64(bytes: &[u8]) -> Result<i64, Error> {
315    if bytes.len() != 8 {
316        return Err(Error::InvalidSlice("too short for i64"));
317    }
318
319    let mut buf = [0; 8];
320    buf.copy_from_slice(bytes);
321    Ok(i64::from_be_bytes(buf))
322}
323
324/// TZif version
325#[derive(Debug, Copy, Clone, Eq, PartialEq)]
326enum Version {
327    /// Version 1
328    V1,
329    /// Version 2
330    V2,
331    /// Version 3
332    V3,
333}