icu_data/
lib.rs

1// Copyright 2019 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Initializers for ICU data files.
6//!
7//! Use the library by instantiating a `Loader` and keeping a reference to it for as long as you
8//! need access to timezone data.  You can do this in your program as many times as needed, and the
9//! loader will make sure that the data is loaded before it is first used, and that it is unloaded
10//! once no more loaders are live.
11//!
12//! It is also possible to clone a loader in case you need to pass it along to ensure that timezone
13//! data is available.
14//!
15//! Example use:
16//!
17//! ```
18//! fn basic() {
19//!     let _loader = Loader::new().expect("loader is constructed with success");
20//!     let _loader2 = Loader::new().expect("second initialization is a no-operation");
21//!     let _loader3 = _loader2.clone();  // It is OK to clone a loader and keep it around.
22//! }
23//! ```
24
25use std::path::PathBuf;
26
27use anyhow::{Context, format_err};
28use fuchsia_sync::Mutex;
29use std::borrow::Cow;
30use std::sync::{Arc, LazyLock, Weak};
31use std::{env, fs, io};
32use thiserror::Error;
33use {rust_icu_common as icu, rust_icu_ucal as ucal, rust_icu_udata as udata};
34
35// The storage for the loaded ICU data.  At most one may be loaded at any given time.
36static REFCOUNT: LazyLock<Mutex<Weak<PathBuf>>> = LazyLock::new(|| Mutex::new(Weak::new()));
37
38// The default location at which to find the ICU data.
39// The icudtl.dat is deliberately omitted to conform to the loading
40// rules described at:
41// https://unicode-org.github.io/icu/userguide/icu_data/#how-data-loading-works
42const ICU_DATA_PATH_DEFAULT: &str = "/pkg/data";
43
44/// Minimum expected length of a time zone revision ID (e.g. "2019c").
45const MIN_TZ_REVISION_ID_LENGTH: usize = 5;
46/// Maximum expected length of a time zone revision ID.
47const MAX_TZ_REVISION_ID_LENGTH: usize = 15;
48
49/// Error type returned by `icu_udata`. The individual enum values encode
50/// classes of possible errors returned.
51#[derive(Error, Debug)]
52pub enum Error {
53    #[error("[icu_data]: {}", _0)]
54    Fail(anyhow::Error),
55    /// The operation failed due to an underlying Zircon error.
56    #[error("[icu_data]: generic error: {}, details: {:?}", _0, _1)]
57    Status(zx::Status, Option<Cow<'static, str>>),
58    /// The operation failed due to an IO error.
59    #[error("[icu_data]: IO error: {}", _0)]
60    IO(io::Error),
61    /// The operation failed due to an ICU library error.
62    #[error("[icu_data]: ICU error: {}", _0)]
63    ICU(icu::Error),
64}
65impl From<zx::Status> for Error {
66    fn from(status: zx::Status) -> Self {
67        Error::Status(status, None)
68    }
69}
70impl From<io::Error> for Error {
71    fn from(err: io::Error) -> Self {
72        Error::IO(err)
73    }
74}
75impl From<anyhow::Error> for Error {
76    fn from(err: anyhow::Error) -> Self {
77        Error::Fail(err)
78    }
79}
80impl From<icu::Error> for Error {
81    fn from(err: icu::Error) -> Self {
82        Error::ICU(err)
83    }
84}
85
86/// Manages the lifecycle of the loaded ICU data.
87///
88/// `Loader` can be created using `Loader::new` and can be cloned.  For as long as any Loader
89/// remains in scope, the ICU data will not be unloaded.
90#[derive(Debug, Clone)]
91pub struct Loader {
92    // The reference here holds the ICU data in memory. It should be held live
93    // until the end of the program.
94    _refs: Arc<PathBuf>,
95}
96// Loader is OK to be sent to threads.
97unsafe impl Sync for Loader {}
98
99impl Loader {
100    /// Initializes the ICU dynamic timezone data, based on the default resource directory.
101    ///
102    /// The caller should create a `Loader` very early on in the lifetime of the program, and keep
103    /// instances of `Loader` alive until the ICU data is needed.  You can make as many `Loader`
104    /// objects as you need.  The data will be unloaded only after the last of them leaves scope.
105    pub fn new() -> Result<Self, Error> {
106        Self::new_with_optional_tz_resources(None, None)
107    }
108
109    /// Initializes ICU data, loading time zone resources from the supplied `path`.
110    ///
111    /// See documentation for `new` for calling constraints.
112    pub fn new_with_tz_resource_path(tzdata_dir_path: &str) -> Result<Self, Error> {
113        Self::new_with_optional_tz_resources(Some(tzdata_dir_path), None)
114    }
115
116    /// Initializes ICU data, loading time zone resources from the supplied `path` and validating
117    /// the time zone revision ID against the ID contained in the file at `revision_file_path`.
118    ///
119    /// See documentation for `new` for calling constraints.
120    pub fn new_with_tz_resources_and_validation(
121        tzdata_dir_path: &str,
122        tz_revision_file_path: &str,
123    ) -> Result<Self, Error> {
124        Self::new_with_optional_tz_resources(Some(tzdata_dir_path), Some(tz_revision_file_path))
125    }
126
127    // Ensures that all calls to create a `Loader` go through the same code path.
128    fn new_with_optional_tz_resources(
129        tzdata_dir_path: Option<&str>,
130        tz_revision_file_path: Option<&str>,
131    ) -> Result<Self, Error> {
132        // The lock contention should not be an issue.  Only a few calls (single digits) to this
133        // function are expected.  So we take a write lock immmediately.
134        let mut l = REFCOUNT.lock();
135        match l.upgrade() {
136            Some(_refs) => Ok(Loader { _refs }),
137            None => {
138                // Load up the TZ files directory.
139                if let Some(p) = tzdata_dir_path {
140                    let for_path = fs::File::open(p)
141                        .map_err(|e| Error::Fail(format_err!("io error: {}", e)))
142                        .with_context(|| format!("error while opening: {:?}", &tzdata_dir_path))?;
143                    let meta = for_path
144                        .metadata()
145                        .with_context(|| format!("while getting metadata for: {:?}", &p))?;
146                    if !meta.is_dir() {
147                        return Err(Error::Fail(format_err!("not a directory: {}", p)));
148                    }
149                    // This is the default API used to configure the ICU library, so we are
150                    // just using it here.  Even though it is not a preferred way to configure
151                    // Fuchsia programs.
152                    // Further, we want to load the same ICU data for all programs that need this
153                    // file.
154                    unsafe { env::set_var("ICU_TIMEZONE_FILES_DIR", p) };
155                }
156
157                // Read ICU data file from the filesystem. The ICU library should
158                // take care to load only the needed parts, since the entire common
159                // data file is fairly large.
160                let path = PathBuf::from(ICU_DATA_PATH_DEFAULT);
161                udata::set_data_directory(&path);
162                let _refs = Arc::new(path);
163                Self::validate_revision(tz_revision_file_path)?;
164                (*l) = Arc::downgrade(&_refs);
165                Ok(Loader { _refs })
166            }
167        }
168    }
169
170    fn validate_revision(tz_revision_file_path: Option<&str>) -> Result<(), Error> {
171        match tz_revision_file_path {
172            None => Ok(()),
173            Some(tz_revision_file_path) => {
174                let expected_revision_id = std::fs::read_to_string(tz_revision_file_path)
175                    .with_context(|| {
176                        format!("could not read file: {:?}", &tz_revision_file_path)
177                    })?;
178                if !(MIN_TZ_REVISION_ID_LENGTH..=MAX_TZ_REVISION_ID_LENGTH)
179                    .contains(&expected_revision_id.len())
180                {
181                    return Err(Error::Status(
182                        zx::Status::IO_DATA_INTEGRITY,
183                        Some(
184                            format!(
185                                "invalid revision ID in {}: {}",
186                                tz_revision_file_path, expected_revision_id
187                            )
188                            .into(),
189                        ),
190                    ));
191                }
192
193                let actual_revision_id = ucal::get_tz_data_version().with_context(|| {
194                    format!("while getting data version from: {:?}", &tz_revision_file_path)
195                })?;
196                if expected_revision_id != actual_revision_id {
197                    return Err(Error::Status(
198                        zx::Status::IO_DATA_INTEGRITY,
199                        Some(
200                            format!(
201                                "expected revision ID {} but got {}",
202                                expected_revision_id, actual_revision_id
203                            )
204                            .into(),
205                        ),
206                    ));
207                }
208
209                Ok(())
210            }
211        }
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use assert_matches::assert_matches;
219    use rust_icu_uenum as uenum;
220
221    // [START loader_example]
222    #[test]
223    fn initialization() {
224        let _loader = Loader::new().expect("loader is constructed with success");
225        let _loader2 = Loader::new().expect("loader is just fine with a second initialization");
226        let tz: String = uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
227        assert_eq!(tz, "ACT");
228        // The library will be cleaned up after the last of the loaders goes out of scope.
229    }
230
231    #[test]
232    fn you_can_also_clone_loaders() {
233        let _loader = Loader::new().expect("loader is constructed with success");
234        let _loader2 = Loader::new().expect("loader is just fine with a second initialization");
235        let _loader3 = _loader2.clone();
236        let tz: String = uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
237        assert_eq!(tz, "ACT");
238    }
239
240    #[test]
241    fn two_initializations_in_a_row() {
242        {
243            let _loader = Loader::new().expect("loader is constructed with success");
244            let tz: String =
245                uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
246            assert_eq!(tz, "ACT");
247        }
248        {
249            let _loader2 = Loader::new().expect("loader is just fine with a second initialization");
250            let tz: String =
251                uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
252            assert_eq!(tz, "ACT");
253        }
254    }
255    // [END loader_example]
256
257    #[test]
258    fn test_tz_res_loading_without_validation() -> Result<(), Error> {
259        let _loader = Loader::new().expect("loader is constructed with success");
260        let tz: String = uenum::open_time_zones()?.take(1).map(|e| e.unwrap()).collect();
261        assert_eq!(tz, "ACT");
262        Ok(())
263    }
264
265    #[test]
266    fn test_tz_res_loading_with_validation_valid() -> Result<(), Error> {
267        let _loader = Loader::new_with_tz_resources_and_validation(
268            "/pkg/data/tzdata/icu/44/le",
269            "/pkg/data/tzdata/revision.txt",
270        )
271        .expect("loader is constructed successfully");
272        let tz: String = uenum::open_time_zones()?.take(1).map(|e| e.unwrap()).collect();
273        assert_eq!(tz, "ACT");
274        Ok(())
275    }
276
277    #[test]
278    fn test_tz_res_loading_with_validation_invalid() -> Result<(), Error> {
279        let result = Loader::new_with_tz_resources_and_validation(
280            "/pkg/data/tzdata/icu/44/le",
281            "/pkg/data/test_inconsistent_revision.txt",
282        );
283        let err = result.unwrap_err();
284        assert_matches!(err, Error::Status(zx::Status::IO_DATA_INTEGRITY, Some(_)));
285        Ok(())
286    }
287}