icu_data/
lib.rs

1// Copyright 2019 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Initializers for ICU data files.
6//!
7//! Use the library by instantiating a `Loader` and keeping a reference to it for as long as you
8//! need access to timezone data.  You can do this in your program as many times as needed, and the
9//! loader will make sure that the data is loaded before it is first used, and that it is unloaded
10//! once no more loaders are live.
11//!
12//! It is also possible to clone a loader in case you need to pass it along to ensure that timezone
13//! data is available.
14//!
15//! Example use:
16//!
17//! ```
18//! fn basic() {
19//!     let _loader = Loader::new().expect("loader is constructed with success");
20//!     let _loader2 = Loader::new().expect("second initialization is a no-operation");
21//!     let _loader3 = _loader2.clone();  // It is OK to clone a loader and keep it around.
22//! }
23//! ```
24
25use std::path::PathBuf;
26
27use anyhow::{format_err, Context};
28use lazy_static::lazy_static;
29use std::borrow::Cow;
30use std::sync::{Arc, Mutex, Weak};
31use std::{env, fs, io};
32use thiserror::Error;
33use {rust_icu_common as icu, rust_icu_ucal as ucal, rust_icu_udata as udata};
34
35lazy_static! {
36    // The storage for the loaded ICU data.  At most one may be loaded at any given time.
37    static ref REFCOUNT: Mutex<Weak<PathBuf>> = Mutex::new(Weak::new());
38}
39
40// The default location at which to find the ICU data.
41// The icudtl.dat is deliberately omitted to conform to the loading
42// rules described at:
43// https://unicode-org.github.io/icu/userguide/icu_data/#how-data-loading-works
44const ICU_DATA_PATH_DEFAULT: &str = "/pkg/data";
45
46/// Minimum expected length of a time zone revision ID (e.g. "2019c").
47const MIN_TZ_REVISION_ID_LENGTH: usize = 5;
48/// Maximum expected length of a time zone revision ID.
49const MAX_TZ_REVISION_ID_LENGTH: usize = 15;
50
51/// Error type returned by `icu_udata`. The individual enum values encode
52/// classes of possible errors returned.
53#[derive(Error, Debug)]
54pub enum Error {
55    #[error("[icu_data]: {}", _0)]
56    Fail(anyhow::Error),
57    /// The operation failed due to an underlying Zircon error.
58    #[error("[icu_data]: generic error: {}, details: {:?}", _0, _1)]
59    Status(zx::Status, Option<Cow<'static, str>>),
60    /// The operation failed due to an IO error.
61    #[error("[icu_data]: IO error: {}", _0)]
62    IO(io::Error),
63    /// The operation failed due to an ICU library error.
64    #[error("[icu_data]: ICU error: {}", _0)]
65    ICU(icu::Error),
66}
67impl From<zx::Status> for Error {
68    fn from(status: zx::Status) -> Self {
69        Error::Status(status, None)
70    }
71}
72impl From<io::Error> for Error {
73    fn from(err: io::Error) -> Self {
74        Error::IO(err)
75    }
76}
77impl From<anyhow::Error> for Error {
78    fn from(err: anyhow::Error) -> Self {
79        Error::Fail(err)
80    }
81}
82impl From<icu::Error> for Error {
83    fn from(err: icu::Error) -> Self {
84        Error::ICU(err)
85    }
86}
87
88/// Manages the lifecycle of the loaded ICU data.
89///
90/// `Loader` can be created using `Loader::new` and can be cloned.  For as long as any Loader
91/// remains in scope, the ICU data will not be unloaded.
92#[derive(Debug, Clone)]
93pub struct Loader {
94    // The reference here holds the ICU data in memory. It should be held live
95    // until the end of the program.
96    _refs: Arc<PathBuf>,
97}
98// Loader is OK to be sent to threads.
99unsafe impl Sync for Loader {}
100
101impl Loader {
102    /// Initializes the ICU dynamic timezone data, based on the default resource directory.
103    ///
104    /// The caller should create a `Loader` very early on in the lifetime of the program, and keep
105    /// instances of `Loader` alive until the ICU data is needed.  You can make as many `Loader`
106    /// objects as you need.  The data will be unloaded only after the last of them leaves scope.
107    pub fn new() -> Result<Self, Error> {
108        Self::new_with_optional_tz_resources(None, None)
109    }
110
111    /// Initializes ICU data, loading time zone resources from the supplied `path`.
112    ///
113    /// See documentation for `new` for calling constraints.
114    pub fn new_with_tz_resource_path(tzdata_dir_path: &str) -> Result<Self, Error> {
115        Self::new_with_optional_tz_resources(Some(tzdata_dir_path), None)
116    }
117
118    /// Initializes ICU data, loading time zone resources from the supplied `path` and validating
119    /// the time zone revision ID against the ID contained in the file at `revision_file_path`.
120    ///
121    /// See documentation for `new` for calling constraints.
122    pub fn new_with_tz_resources_and_validation(
123        tzdata_dir_path: &str,
124        tz_revision_file_path: &str,
125    ) -> Result<Self, Error> {
126        Self::new_with_optional_tz_resources(Some(tzdata_dir_path), Some(tz_revision_file_path))
127    }
128
129    // Ensures that all calls to create a `Loader` go through the same code path.
130    fn new_with_optional_tz_resources(
131        tzdata_dir_path: Option<&str>,
132        tz_revision_file_path: Option<&str>,
133    ) -> Result<Self, Error> {
134        // The lock contention should not be an issue.  Only a few calls (single digits) to this
135        // function are expected.  So we take a write lock immmediately.
136        let mut l = REFCOUNT.lock().expect("refcount lock acquired");
137        match l.upgrade() {
138            Some(_refs) => Ok(Loader { _refs }),
139            None => {
140                // Load up the TZ files directory.
141                if let Some(p) = tzdata_dir_path {
142                    let for_path = fs::File::open(p)
143                        .map_err(|e| Error::Fail(format_err!("io error: {}", e)))
144                        .with_context(|| format!("error while opening: {:?}", &tzdata_dir_path))?;
145                    let meta = for_path
146                        .metadata()
147                        .with_context(|| format!("while getting metadata for: {:?}", &p))?;
148                    if !meta.is_dir() {
149                        return Err(Error::Fail(format_err!("not a directory: {}", p)));
150                    }
151                    // This is the default API used to configure the ICU library, so we are
152                    // just using it here.  Even though it is not a preferred way to configure
153                    // Fuchsia programs.
154                    // Further, we want to load the same ICU data for all programs that need this
155                    // file.
156                    env::set_var("ICU_TIMEZONE_FILES_DIR", p);
157                }
158
159                // Read ICU data file from the filesystem. The ICU library should
160                // take care to load only the needed parts, since the entire common
161                // data file is fairly large.
162                let path = PathBuf::from(ICU_DATA_PATH_DEFAULT);
163                udata::set_data_directory(&path);
164                let _refs = Arc::new(path);
165                Self::validate_revision(tz_revision_file_path)?;
166                (*l) = Arc::downgrade(&_refs);
167                Ok(Loader { _refs })
168            }
169        }
170    }
171
172    fn validate_revision(tz_revision_file_path: Option<&str>) -> Result<(), Error> {
173        match tz_revision_file_path {
174            None => Ok(()),
175            Some(tz_revision_file_path) => {
176                let expected_revision_id = std::fs::read_to_string(tz_revision_file_path)
177                    .with_context(|| {
178                        format!("could not read file: {:?}", &tz_revision_file_path)
179                    })?;
180                if !(MIN_TZ_REVISION_ID_LENGTH..=MAX_TZ_REVISION_ID_LENGTH)
181                    .contains(&expected_revision_id.len())
182                {
183                    return Err(Error::Status(
184                        zx::Status::IO_DATA_INTEGRITY,
185                        Some(
186                            format!(
187                                "invalid revision ID in {}: {}",
188                                tz_revision_file_path, expected_revision_id
189                            )
190                            .into(),
191                        ),
192                    ));
193                }
194
195                let actual_revision_id = ucal::get_tz_data_version().with_context(|| {
196                    format!("while getting data version from: {:?}", &tz_revision_file_path)
197                })?;
198                if expected_revision_id != actual_revision_id {
199                    return Err(Error::Status(
200                        zx::Status::IO_DATA_INTEGRITY,
201                        Some(
202                            format!(
203                                "expected revision ID {} but got {}",
204                                expected_revision_id, actual_revision_id
205                            )
206                            .into(),
207                        ),
208                    ));
209                }
210
211                Ok(())
212            }
213        }
214    }
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220    use assert_matches::assert_matches;
221    use rust_icu_uenum as uenum;
222
223    // [START loader_example]
224    #[test]
225    fn initialization() {
226        let _loader = Loader::new().expect("loader is constructed with success");
227        let _loader2 = Loader::new().expect("loader is just fine with a second initialization");
228        let tz: String = uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
229        assert_eq!(tz, "ACT");
230        // The library will be cleaned up after the last of the loaders goes out of scope.
231    }
232
233    #[test]
234    fn you_can_also_clone_loaders() {
235        let _loader = Loader::new().expect("loader is constructed with success");
236        let _loader2 = Loader::new().expect("loader is just fine with a second initialization");
237        let _loader3 = _loader2.clone();
238        let tz: String = uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
239        assert_eq!(tz, "ACT");
240    }
241
242    #[test]
243    fn two_initializations_in_a_row() {
244        {
245            let _loader = Loader::new().expect("loader is constructed with success");
246            let tz: String =
247                uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
248            assert_eq!(tz, "ACT");
249        }
250        {
251            let _loader2 = Loader::new().expect("loader is just fine with a second initialization");
252            let tz: String =
253                uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
254            assert_eq!(tz, "ACT");
255        }
256    }
257    // [END loader_example]
258
259    #[test]
260    fn test_tz_res_loading_without_validation() -> Result<(), Error> {
261        let _loader = Loader::new().expect("loader is constructed with success");
262        let tz: String = uenum::open_time_zones()?.take(1).map(|e| e.unwrap()).collect();
263        assert_eq!(tz, "ACT");
264        Ok(())
265    }
266
267    #[test]
268    fn test_tz_res_loading_with_validation_valid() -> Result<(), Error> {
269        let _loader = Loader::new_with_tz_resources_and_validation(
270            "/pkg/data/tzdata/icu/44/le",
271            "/pkg/data/tzdata/revision.txt",
272        )
273        .expect("loader is constructed successfully");
274        let tz: String = uenum::open_time_zones()?.take(1).map(|e| e.unwrap()).collect();
275        assert_eq!(tz, "ACT");
276        Ok(())
277    }
278
279    #[test]
280    fn test_tz_res_loading_with_validation_invalid() -> Result<(), Error> {
281        let result = Loader::new_with_tz_resources_and_validation(
282            "/pkg/data/tzdata/icu/44/le",
283            "/pkg/data/test_inconsistent_revision.txt",
284        );
285        let err = result.unwrap_err();
286        assert_matches!(err, Error::Status(zx::Status::IO_DATA_INTEGRITY, Some(_)));
287        Ok(())
288    }
289}