rust_icu_udat/
lib.rs

1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Contains implementations of functions from ICU's `udat.h`.
16//!
17//! All functions that take `ustring::UChar` instead of a rust string reference do so for
18//! efficiency.  The encoding of `ustring::UChar` is uniform (in contrast to say UTF-8), so
19//! repeated manipulation of that string does not waste CPU cycles.
20//!
21//! For detailed instructions for date and time formatting please refer to the [original Unicode
22//! project documentation for date and time formatting](http://userguide.icu-project.org/formatparse/datetime)
23
24use {
25    rust_icu_common as common, rust_icu_sys as sys, rust_icu_sys::versioned_function,
26    rust_icu_ucal as ucal, rust_icu_uloc as uloc, rust_icu_ustring as ustring,
27};
28use std::convert::{TryFrom, TryInto};
29
30/// Implements `UDateTimePatternGenerator`. Since 0.5.1.
31#[derive(Debug)]
32pub struct UDatePatternGenerator {
33    rep: std::ptr::NonNull<sys::UDateTimePatternGenerator>,
34}
35
36impl std::clone::Clone for UDatePatternGenerator {
37    /// Implements `udatpg_clone`. Since 0.5.1.
38    fn clone(&self) -> Self {
39        let mut status = common::Error::OK_CODE;
40        let rep = unsafe {
41            let cloned = versioned_function!(udatpg_clone)(self.rep.as_ptr(), &mut status);
42            std::ptr::NonNull::new_unchecked(cloned)
43        };
44        UDatePatternGenerator{ rep }
45    }
46}
47
48// Implements `udatpg_close`.
49common::simple_drop_impl!(UDatePatternGenerator, udatpg_close);
50
51impl UDatePatternGenerator {
52    /// Implements `udatpg_open`. Since 0.5.1.
53    pub fn new(loc: &uloc::ULoc) -> Result<Self, common::Error> {
54        let mut status = common::Error::OK_CODE;
55        let asciiz = loc.as_c_str();
56
57        let rep = unsafe {
58            assert!(common::Error::is_ok(status));
59            let ptr = versioned_function!(udatpg_open)(
60                asciiz.as_ptr(),
61                &mut status,
62                );
63            std::ptr::NonNull::new_unchecked(ptr)
64        };
65        common::Error::ok_or_warning(status)?;
66        Ok(UDatePatternGenerator{ rep })
67    }
68
69    /// Implements `udatpg_getBestPattern`. Since 0.5.1.
70    pub fn get_best_pattern(&self, skeleton: &str) -> Result<String, common::Error> {
71        let skeleton = ustring::UChar::try_from(skeleton)?;
72        let result = self.get_best_pattern_ustring(&skeleton)?;
73        String::try_from(&result)
74    }
75
76    /// Implements `udatpg_getBestPattern`. Since 0.5.1.
77    pub fn get_best_pattern_ustring(&self, skeleton: &ustring::UChar) -> Result<ustring::UChar, common::Error> {
78        const BUFFER_CAPACITY: usize = 180;
79        ustring::buffered_uchar_method_with_retry!(
80            get_best_pattern_impl,
81            BUFFER_CAPACITY,
82            [f: *mut sys::UDateTimePatternGenerator, skel: *const sys::UChar, skel_len: i32,],
83            []
84        );
85        get_best_pattern_impl(
86            versioned_function!(udatpg_getBestPattern),
87            self.rep.as_ptr(),
88            skeleton.as_c_ptr(), skeleton.len() as i32,
89            )
90    }
91}
92
93/// Implements `UDateFormat`
94#[derive(Debug)]
95pub struct UDateFormat {
96    // Internal C representation of UDateFormat.  It is owned by this type and
97    // must be dropped by calling `udat_close`.
98    rep: *mut sys::UDateFormat,
99}
100
101impl Drop for UDateFormat {
102    /// Implements `udat_close`
103    fn drop(&mut self) {
104        unsafe {
105            versioned_function!(udat_close)(self.rep);
106        }
107    }
108}
109
110/// Parsed contains output of the call to `UDateFormat::parse_from_position`.
111pub struct Parsed {
112    /// The point in time parsed out of the date-time string.
113    date: sys::UDate,
114
115    /// The position in the input string at which the parsing ended.
116    end_position: usize,
117}
118
119impl Parsed {
120    /// Returns the date resulting from a call to `UDateFormat::parse_from_position`.
121    pub fn date(&self) -> sys::UDate {
122        self.date
123    }
124    /// Returns the end position resulting from a call to `UDateFormat::parse_from_position`.
125    pub fn end_position(&self) -> usize {
126        self.end_position
127    }
128}
129
130impl UDateFormat {
131    /// Creates a new `UDateFormat` based on the provided styles.
132    ///
133    /// Neither time_style nor date_style may be `UDAT_PATTERN`.  If you need
134    /// formatting with a pattern, use instead `new_with_pattern`.
135    /// Implements `udat_open`
136    pub fn new_with_styles(
137        time_style: sys::UDateFormatStyle,
138        date_style: sys::UDateFormatStyle,
139        loc: &uloc::ULoc,
140        tz_id: &ustring::UChar,
141    ) -> Result<Self, common::Error> {
142        assert_ne!(
143            time_style,
144            sys::UDateFormatStyle::UDAT_PATTERN,
145            "programmer error: time_style may not be UDAT_PATTERN"
146        );
147        assert_ne!(
148            date_style,
149            sys::UDateFormatStyle::UDAT_PATTERN,
150            "programmer error: date_style may not be UDAT_PATTERN"
151        );
152        // pattern is ignored if time_style or date_style aren't equal to pattern.
153        let pattern = ustring::UChar::try_from("").expect("pattern created");
154
155        Self::new_internal(time_style, date_style, loc, tz_id, &pattern)
156    }
157
158    /// Creates a new `UDateFormat` based on the provided pattern.
159    ///
160    /// One example pattern is: "yyyy-MM-dd'T'HH:mm:ssXX".
161    ///
162    /// Implements `udat_open`
163    pub fn new_with_pattern(
164        loc: &uloc::ULoc,
165        tz_id: &ustring::UChar,
166        pattern: &ustring::UChar,
167    ) -> Result<Self, common::Error> {
168        Self::new_internal(
169            /*timestyle=*/ sys::UDateFormatStyle::UDAT_PATTERN,
170            /*datestyle=*/ sys::UDateFormatStyle::UDAT_PATTERN,
171            loc,
172            tz_id,
173            pattern,
174        )
175    }
176
177    // Generalized constructor based on `udat_open`.  It is hidden from public eye because its
178    // input parameters are not orthogonal.
179    //
180    // Implements `udat_open`
181    fn new_internal(
182        time_style: sys::UDateFormatStyle,
183        date_style: sys::UDateFormatStyle,
184        loc: &uloc::ULoc,
185        tz_id: &ustring::UChar,
186        pattern: &ustring::UChar,
187    ) -> Result<Self, common::Error> {
188        let mut status = common::Error::OK_CODE;
189        let asciiz = loc.as_c_str();
190
191        // If the timezone is empty, short-circuit it to default.
192        let (tz_id_ptr, tz_id_len): (*const rust_icu_sys::UChar, i32) = if tz_id.len() == 0 {
193            (std::ptr::null(), 0i32)
194        } else {
195            (tz_id.as_c_ptr(), tz_id.len() as i32)
196        };
197
198        // Requires that all pointers be valid. Should be guaranteed by all
199        // objects passed into this function.
200        let date_format = unsafe {
201            assert!(common::Error::is_ok(status));
202            versioned_function!(udat_open)(
203                time_style,
204                date_style,
205                asciiz.as_ptr(),
206                tz_id_ptr,
207                tz_id_len,
208                pattern.as_c_ptr(),
209                pattern.len() as i32,
210                &mut status,
211            )
212        };
213        common::Error::ok_or_warning(status)?;
214        Ok(UDateFormat { rep: date_format })
215    }
216
217    /// Implements `udat_setCalendar`
218    pub fn set_calendar(&mut self, calendar: &ucal::UCalendar) {
219        unsafe {
220            versioned_function!(udat_setCalendar)(self.rep, calendar.as_c_calendar());
221        };
222    }
223
224    /// Parses a date-time given as a string into a `sys::UDate` timestamp.
225    ///
226    /// This version of date parsing does not allow reuse of the input parameters so it is less
227    /// useful for purposes that are not one-shot. See somewhat more detailed `parse_from_position`
228    /// instead.
229    ///
230    /// Implements `udat_parse`
231    pub fn parse(&self, datetime: &str) -> Result<sys::UDate, common::Error> {
232        let datetime_uc = ustring::UChar::try_from(datetime)?;
233        self.parse_from_position(&datetime_uc, 0).map(|r| r.date)
234    }
235
236    /// Parses a date-time given as a string into a `sys::UDate` timestamp and a position
237    /// indicating the first index into `datetime` that was not consumed in parsing.  The
238    /// `position` parameter indicates the index into `datetime` that parsing should start from.
239    ///
240    /// Implements `udat_parse`
241    pub fn parse_from_position(
242        &self,
243        datetime: &ustring::UChar,
244        position: usize,
245    ) -> Result<Parsed, common::Error> {
246        let mut status = common::Error::OK_CODE;
247        let mut _unused_pos: i32 = 0;
248
249        // We do not expect positions that exceed the range of i32.
250        let mut end_position: i32 = position as i32;
251        // Requires that self.rep, and datetime are valid values.  Ensured by
252        // the guaranteses of UDateFormat and ustring::UChar.
253        let date = unsafe {
254            versioned_function!(udat_parse)(
255                self.rep,
256                datetime.as_c_ptr(),
257                datetime.len() as i32,
258                &mut end_position,
259                &mut status,
260            )
261        };
262        common::Error::ok_or_warning(status)?;
263        Ok(Parsed {
264            date,
265            end_position: end_position as usize,
266        })
267    }
268
269    /// Formats a date using this formatter.
270    ///
271    /// Implements `udat_format`
272    pub fn format(&self, date_to_format: sys::UDate) -> Result<String, common::Error> {
273        // This approach follows the recommended practice for unicode conversions: adopt a
274        // resonably-sized buffer, then repeat the conversion if it fails the first time around.
275        const CAP: usize = 1024;
276        let mut status = common::Error::OK_CODE;
277        let mut result = ustring::UChar::new_with_capacity(CAP);
278
279        let mut field_position_unused = sys::UFieldPosition {
280            field: 0,
281            beginIndex: 0,
282            endIndex: 0,
283        };
284
285        // Requires that result is a buffer at least as long as CAP and that
286        // self.rep is a valid pointer to a `sys::UDateFormat` structure.
287        let total_size = unsafe {
288            assert!(common::Error::is_ok(status));
289            versioned_function!(udat_format)(
290                self.rep,
291                date_to_format,
292                result.as_mut_c_ptr(),
293                CAP as i32,
294                &mut field_position_unused,
295                &mut status,
296            )
297        } as usize;
298        common::Error::ok_or_warning(status)?;
299        result.resize(total_size as usize);
300        if total_size > CAP {
301            // Requires that result is a buffer that has length and capacity of
302            // exactly total_size, and that self.rep is a valid pointer to
303            // a `UDateFormat`.
304            unsafe {
305                assert!(common::Error::is_ok(status));
306                versioned_function!(udat_format)(
307                    self.rep,
308                    date_to_format,
309                    result.as_mut_c_ptr(),
310                    total_size as i32,
311                    &mut field_position_unused,
312                    &mut status,
313                );
314            };
315            common::Error::ok_or_warning(status)?;
316        }
317        String::try_from(&result)
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324
325    /// Restores the timezone once its scope ends.
326    struct RestoreTimezone {
327        timezone_to_restore: String,
328    }
329
330    impl Drop for RestoreTimezone {
331        fn drop(&mut self) {
332            RestoreTimezone::set_default_time_zone(&self.timezone_to_restore).unwrap();
333        }
334    }
335
336    impl RestoreTimezone {
337        /// Set the timezone to the requested one, and restores to whatever the timezone
338        /// was before the set once the struct goes out of scope.
339        fn new(set_timezone: &str) -> Self {
340            let timezone_to_restore =
341                RestoreTimezone::get_default_time_zone().expect("could get old time zone");
342            RestoreTimezone::set_default_time_zone(set_timezone).expect("set timezone");
343            RestoreTimezone {
344                timezone_to_restore,
345            }
346        }
347
348        // The two methods below are lifted from `rust_icu_ucal` to not introduce
349        // a circular dependency.
350
351        fn set_default_time_zone(zone_id: &str) -> Result<(), common::Error> {
352            let mut status = common::Error::OK_CODE;
353            let mut zone_id_uchar = ustring::UChar::try_from(zone_id)?;
354            zone_id_uchar.make_z();
355            // Requires zone_id_uchar to be a valid pointer until the function returns.
356            unsafe {
357                assert!(common::Error::is_ok(status));
358                versioned_function!(ucal_setDefaultTimeZone)(zone_id_uchar.as_c_ptr(), &mut status);
359            };
360            common::Error::ok_or_warning(status)
361        }
362
363        fn get_default_time_zone() -> Result<String, common::Error> {
364            let mut status = common::Error::OK_CODE;
365
366            // Preflight the time zone first.
367            let time_zone_length = unsafe {
368                assert!(common::Error::is_ok(status));
369                versioned_function!(ucal_getDefaultTimeZone)(std::ptr::null_mut(), 0, &mut status)
370            } as usize;
371            common::Error::ok_preflight(status)?;
372
373            // Should this capacity include the terminating \u{0}?
374            let mut status = common::Error::OK_CODE;
375            let mut uchar = ustring::UChar::new_with_capacity(time_zone_length);
376
377            // Requires that uchar is a valid buffer.  Should be guaranteed by the constructor above.
378            unsafe {
379                assert!(common::Error::is_ok(status));
380                versioned_function!(ucal_getDefaultTimeZone)(
381                    uchar.as_mut_c_ptr(),
382                    time_zone_length as i32,
383                    &mut status,
384                )
385            };
386            common::Error::ok_or_warning(status)?;
387            String::try_from(&uchar)
388        }
389    }
390
391    #[test]
392    fn test_format_default_calendar() -> Result<(), common::Error> {
393        #[derive(Debug)]
394        struct Test {
395            _name: &'static str,
396            locale: &'static str,
397            timezone: &'static str,
398            date: sys::UDate,
399            expected: &'static str,
400            calendar: Option<ucal::UCalendar>,
401        }
402        let tests = vec![
403            Test {
404                _name: "French default",
405                locale: "fr-FR",
406                timezone: "America/Los_Angeles",
407                date: 100.0,
408                expected:
409                    "mercredi 31 décembre 1969 à 16:00:00 heure normale du Pacifique nord-américain",
410                calendar: None,
411            },
412            Test {
413                _name: "French default, a few hours later",
414                locale: "fr-FR",
415                timezone: "America/Los_Angeles",
416                date: 100000.0,
417                expected:
418                    "mercredi 31 décembre 1969 à 16:01:40 heure normale du Pacifique nord-américain",
419                calendar: None,
420            },
421            Test {
422                _name: "Serbian default",
423                locale: "sr-RS",
424                timezone: "America/Los_Angeles",
425                date: 100000.0,
426                expected:
427                    "среда, 31. децембар 1969. 16:01:40 Северноамеричко пацифичко стандардно време",
428                calendar: None,
429            },
430            // TODO: The Dutch time zones regressed: https://unicode-org.atlassian.net/browse/CLDR-17389
431            //Test {
432                //_name: "Dutch default",
433                //locale: "nl-NL",
434                //timezone: "America/Los_Angeles",
435                //date: 100000.0,
436                //expected: "woensdag 31 december 1969 om 16:01:40 Pacific-standaardtijd",
437                //calendar: None,
438            //},
439            //Test {
440                //_name: "Dutch islamic overrides locale calendar and timezone",
441                //locale: "nl-NL-u-ca-gregorian",
442                //timezone: "America/Los_Angeles",
443                //date: 100000.0,
444                //expected: "woensdag 22 Sjawal 1389 om 16:01:40 Pacific-standaardtijd",
445                //calendar: Some(
446                    //ucal::UCalendar::new(
447                        //"America/Los_Angeles",
448                        //"und-u-ca-islamic",
449                        //sys::UCalendarType::UCAL_DEFAULT,
450                    //)
451                    //.expect("created calendar"),
452                //),
453            //},
454            //Test {
455                //_name: "Dutch islamic take from locale",
456                //locale: "nl-NL-u-ca-islamic",
457                //timezone: "America/Los_Angeles",
458                //date: 200000.0,
459                //expected: "woensdag 22 Sjawal 1389 AH om 16:03:20 Pacific-standaardtijd",
460                //calendar: None,
461            //},
462            //Test {
463                //_name: "Dutch islamic take from locale",
464                //locale: "nl-NL-u-ca-islamic",
465                //timezone: "America/Los_Angeles",
466                //date: 200000.0,
467                //expected: "woensdag 22 Sjawal 1389 AH om 16:03:20 Pacific-standaardtijd",
468                //calendar: None,
469            //},
470        ];
471
472        let _restore_timezone = RestoreTimezone::new("UTC");
473        for t in tests {
474            let loc = uloc::ULoc::try_from(t.locale)?;
475            let tz_id = ustring::UChar::try_from(t.timezone)?;
476
477            let mut fmt = super::UDateFormat::new_with_styles(
478                sys::UDateFormatStyle::UDAT_FULL,
479                sys::UDateFormatStyle::UDAT_FULL,
480                &loc,
481                &tz_id,
482            )?;
483            if let Some(ref cal) = t.calendar {
484                fmt.set_calendar(&cal);
485            }
486
487            let fmt = fmt;
488            let actual = fmt.format(t.date)?;
489            assert_eq!(
490                actual, t.expected,
491                "(left==actual; right==expected)\n\ttest: {:?}",
492                t
493            );
494        }
495        Ok(())
496    }
497
498    #[test]
499    fn test_format_pattern() -> Result<(), common::Error> {
500        #[derive(Debug)]
501        struct Test {
502            date: sys::UDate,
503            pattern: &'static str,
504            expected: &'static str,
505        }
506        let tests = vec![
507            Test {
508                date: 100.0,
509                pattern: "yyyy-MM-dd'T'HH:mm:ssXX",
510                expected: "1969-12-31T19:00:00-0500",
511            },
512            Test {
513                date: 100000.0,
514                pattern: "yyyy-MM-dd'T'HH",
515                expected: "1969-12-31T19",
516            },
517            Test {
518                date: 100000.0,
519                pattern: "V",
520                expected: "usnyc",
521            },
522        ];
523        let loc = uloc::ULoc::try_from("en-US")?;
524        let tz_id = ustring::UChar::try_from("America/New_York")?;
525        for t in tests {
526            let pattern = ustring::UChar::try_from(t.pattern)?;
527            let fmt = super::UDateFormat::new_with_pattern(&loc, &tz_id, &pattern)?;
528            let actual = fmt.format(t.date)?;
529            assert_eq!(
530                actual, t.expected,
531                "want: {:?}, got: {:?}",
532                t.expected, actual
533            );
534        }
535        Ok(())
536    }
537
538    #[test]
539    fn parse_utf8() -> Result<(), common::Error> {
540        #[derive(Debug)]
541        struct Test {
542            input: &'static str,
543            pattern: &'static str,
544            expected: sys::UDate,
545        }
546        let tests: Vec<Test> = vec![
547            Test {
548                input: "2018-10-30T15:30:00-07:00",
549                pattern: "yyyy-MM-dd'T'HH:mm:ssXX",
550                expected: 1540938600000.0 as sys::UDate,
551            },
552            Test {
553                input: "2018-10-30T15:30:00-07:00",
554                // The entire "time" portion of this string is not used.
555                pattern: "yyyy-MM-dd",
556                expected: 1540872000000.0 as sys::UDate,
557            },
558        ];
559
560        let loc = uloc::ULoc::try_from("en-US")?;
561        let tz_id = ustring::UChar::try_from("America/New_York")?;
562
563        for test in tests {
564            let pattern = ustring::UChar::try_from(test.pattern)?;
565            let format = super::UDateFormat::new_with_pattern(&loc, &tz_id, &pattern)?;
566            let actual = format.parse(test.input)?;
567            assert_eq!(
568                actual, test.expected,
569                "want: {:?}, got: {:?}",
570                test.expected, actual
571            )
572        }
573        Ok(())
574    }
575
576    #[test]
577    fn best_pattern() -> Result<(), common::Error> {
578        #[derive(Debug)]
579        struct Test {
580            locale: &'static str,
581            skeleton: &'static str,
582            expected: &'static str,
583        }
584        let tests: Vec<Test> = vec![
585            Test {
586                locale: "sr-RS",
587                skeleton: "MMMMj",
588                expected: "LLLL HH",
589            },
590            Test {
591                locale: "en-US",
592                skeleton: "EEEE yyy LLL d H m s",
593                expected: "EEEE, MMM d, yyy, HH:mm:ss",
594            },
595        ];
596        for test in tests {
597            let locale = uloc::ULoc::try_from(test.locale)?;
598            let gen = UDatePatternGenerator::new(&locale)?.clone();
599            let actual = gen.get_best_pattern(&test.skeleton)?;
600            assert_eq!(actual, test.expected, "for test: {:?}", &test);
601        }
602        Ok(())
603    }
604}