icu_data/
lib.rs

1// Copyright 2019 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Initializers for ICU data files.
6//!
7//! Use the library by instantiating a `Loader` and keeping a reference to it for as long as you
8//! need access to timezone data.  You can do this in your program as many times as needed, and the
9//! loader will make sure that the data is loaded before it is first used, and that it is unloaded
10//! once no more loaders are live.
11//!
12//! It is also possible to clone a loader in case you need to pass it along to ensure that timezone
13//! data is available.
14//!
15//! Example use:
16//!
17//! ```
18//! fn basic() {
19//!     let _loader = Loader::new().expect("loader is constructed with success");
20//!     let _loader2 = Loader::new().expect("second initialization is a no-operation");
21//!     let _loader3 = _loader2.clone();  // It is OK to clone a loader and keep it around.
22//! }
23//! ```
24
25use std::path::PathBuf;
26
27use anyhow::{Context, format_err};
28use std::borrow::Cow;
29use std::sync::{Arc, LazyLock, Mutex, Weak};
30use std::{env, fs, io};
31use thiserror::Error;
32use {rust_icu_common as icu, rust_icu_ucal as ucal, rust_icu_udata as udata};
33
34// The storage for the loaded ICU data.  At most one may be loaded at any given time.
35static REFCOUNT: LazyLock<Mutex<Weak<PathBuf>>> = LazyLock::new(|| Mutex::new(Weak::new()));
36
37// The default location at which to find the ICU data.
38// The icudtl.dat is deliberately omitted to conform to the loading
39// rules described at:
40// https://unicode-org.github.io/icu/userguide/icu_data/#how-data-loading-works
41const ICU_DATA_PATH_DEFAULT: &str = "/pkg/data";
42
43/// Minimum expected length of a time zone revision ID (e.g. "2019c").
44const MIN_TZ_REVISION_ID_LENGTH: usize = 5;
45/// Maximum expected length of a time zone revision ID.
46const MAX_TZ_REVISION_ID_LENGTH: usize = 15;
47
48/// Error type returned by `icu_udata`. The individual enum values encode
49/// classes of possible errors returned.
50#[derive(Error, Debug)]
51pub enum Error {
52    #[error("[icu_data]: {}", _0)]
53    Fail(anyhow::Error),
54    /// The operation failed due to an underlying Zircon error.
55    #[error("[icu_data]: generic error: {}, details: {:?}", _0, _1)]
56    Status(zx::Status, Option<Cow<'static, str>>),
57    /// The operation failed due to an IO error.
58    #[error("[icu_data]: IO error: {}", _0)]
59    IO(io::Error),
60    /// The operation failed due to an ICU library error.
61    #[error("[icu_data]: ICU error: {}", _0)]
62    ICU(icu::Error),
63}
64impl From<zx::Status> for Error {
65    fn from(status: zx::Status) -> Self {
66        Error::Status(status, None)
67    }
68}
69impl From<io::Error> for Error {
70    fn from(err: io::Error) -> Self {
71        Error::IO(err)
72    }
73}
74impl From<anyhow::Error> for Error {
75    fn from(err: anyhow::Error) -> Self {
76        Error::Fail(err)
77    }
78}
79impl From<icu::Error> for Error {
80    fn from(err: icu::Error) -> Self {
81        Error::ICU(err)
82    }
83}
84
85/// Manages the lifecycle of the loaded ICU data.
86///
87/// `Loader` can be created using `Loader::new` and can be cloned.  For as long as any Loader
88/// remains in scope, the ICU data will not be unloaded.
89#[derive(Debug, Clone)]
90pub struct Loader {
91    // The reference here holds the ICU data in memory. It should be held live
92    // until the end of the program.
93    _refs: Arc<PathBuf>,
94}
95// Loader is OK to be sent to threads.
96unsafe impl Sync for Loader {}
97
98impl Loader {
99    /// Initializes the ICU dynamic timezone data, based on the default resource directory.
100    ///
101    /// The caller should create a `Loader` very early on in the lifetime of the program, and keep
102    /// instances of `Loader` alive until the ICU data is needed.  You can make as many `Loader`
103    /// objects as you need.  The data will be unloaded only after the last of them leaves scope.
104    pub fn new() -> Result<Self, Error> {
105        Self::new_with_optional_tz_resources(None, None)
106    }
107
108    /// Initializes ICU data, loading time zone resources from the supplied `path`.
109    ///
110    /// See documentation for `new` for calling constraints.
111    pub fn new_with_tz_resource_path(tzdata_dir_path: &str) -> Result<Self, Error> {
112        Self::new_with_optional_tz_resources(Some(tzdata_dir_path), None)
113    }
114
115    /// Initializes ICU data, loading time zone resources from the supplied `path` and validating
116    /// the time zone revision ID against the ID contained in the file at `revision_file_path`.
117    ///
118    /// See documentation for `new` for calling constraints.
119    pub fn new_with_tz_resources_and_validation(
120        tzdata_dir_path: &str,
121        tz_revision_file_path: &str,
122    ) -> Result<Self, Error> {
123        Self::new_with_optional_tz_resources(Some(tzdata_dir_path), Some(tz_revision_file_path))
124    }
125
126    // Ensures that all calls to create a `Loader` go through the same code path.
127    fn new_with_optional_tz_resources(
128        tzdata_dir_path: Option<&str>,
129        tz_revision_file_path: Option<&str>,
130    ) -> Result<Self, Error> {
131        // The lock contention should not be an issue.  Only a few calls (single digits) to this
132        // function are expected.  So we take a write lock immmediately.
133        let mut l = REFCOUNT.lock().expect("refcount lock acquired");
134        match l.upgrade() {
135            Some(_refs) => Ok(Loader { _refs }),
136            None => {
137                // Load up the TZ files directory.
138                if let Some(p) = tzdata_dir_path {
139                    let for_path = fs::File::open(p)
140                        .map_err(|e| Error::Fail(format_err!("io error: {}", e)))
141                        .with_context(|| format!("error while opening: {:?}", &tzdata_dir_path))?;
142                    let meta = for_path
143                        .metadata()
144                        .with_context(|| format!("while getting metadata for: {:?}", &p))?;
145                    if !meta.is_dir() {
146                        return Err(Error::Fail(format_err!("not a directory: {}", p)));
147                    }
148                    // This is the default API used to configure the ICU library, so we are
149                    // just using it here.  Even though it is not a preferred way to configure
150                    // Fuchsia programs.
151                    // Further, we want to load the same ICU data for all programs that need this
152                    // file.
153                    env::set_var("ICU_TIMEZONE_FILES_DIR", p);
154                }
155
156                // Read ICU data file from the filesystem. The ICU library should
157                // take care to load only the needed parts, since the entire common
158                // data file is fairly large.
159                let path = PathBuf::from(ICU_DATA_PATH_DEFAULT);
160                udata::set_data_directory(&path);
161                let _refs = Arc::new(path);
162                Self::validate_revision(tz_revision_file_path)?;
163                (*l) = Arc::downgrade(&_refs);
164                Ok(Loader { _refs })
165            }
166        }
167    }
168
169    fn validate_revision(tz_revision_file_path: Option<&str>) -> Result<(), Error> {
170        match tz_revision_file_path {
171            None => Ok(()),
172            Some(tz_revision_file_path) => {
173                let expected_revision_id = std::fs::read_to_string(tz_revision_file_path)
174                    .with_context(|| {
175                        format!("could not read file: {:?}", &tz_revision_file_path)
176                    })?;
177                if !(MIN_TZ_REVISION_ID_LENGTH..=MAX_TZ_REVISION_ID_LENGTH)
178                    .contains(&expected_revision_id.len())
179                {
180                    return Err(Error::Status(
181                        zx::Status::IO_DATA_INTEGRITY,
182                        Some(
183                            format!(
184                                "invalid revision ID in {}: {}",
185                                tz_revision_file_path, expected_revision_id
186                            )
187                            .into(),
188                        ),
189                    ));
190                }
191
192                let actual_revision_id = ucal::get_tz_data_version().with_context(|| {
193                    format!("while getting data version from: {:?}", &tz_revision_file_path)
194                })?;
195                if expected_revision_id != actual_revision_id {
196                    return Err(Error::Status(
197                        zx::Status::IO_DATA_INTEGRITY,
198                        Some(
199                            format!(
200                                "expected revision ID {} but got {}",
201                                expected_revision_id, actual_revision_id
202                            )
203                            .into(),
204                        ),
205                    ));
206                }
207
208                Ok(())
209            }
210        }
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217    use assert_matches::assert_matches;
218    use rust_icu_uenum as uenum;
219
220    // [START loader_example]
221    #[test]
222    fn initialization() {
223        let _loader = Loader::new().expect("loader is constructed with success");
224        let _loader2 = Loader::new().expect("loader is just fine with a second initialization");
225        let tz: String = uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
226        assert_eq!(tz, "ACT");
227        // The library will be cleaned up after the last of the loaders goes out of scope.
228    }
229
230    #[test]
231    fn you_can_also_clone_loaders() {
232        let _loader = Loader::new().expect("loader is constructed with success");
233        let _loader2 = Loader::new().expect("loader is just fine with a second initialization");
234        let _loader3 = _loader2.clone();
235        let tz: String = uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
236        assert_eq!(tz, "ACT");
237    }
238
239    #[test]
240    fn two_initializations_in_a_row() {
241        {
242            let _loader = Loader::new().expect("loader is constructed with success");
243            let tz: String =
244                uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
245            assert_eq!(tz, "ACT");
246        }
247        {
248            let _loader2 = Loader::new().expect("loader is just fine with a second initialization");
249            let tz: String =
250                uenum::open_time_zones().unwrap().take(1).map(|e| e.unwrap()).collect();
251            assert_eq!(tz, "ACT");
252        }
253    }
254    // [END loader_example]
255
256    #[test]
257    fn test_tz_res_loading_without_validation() -> Result<(), Error> {
258        let _loader = Loader::new().expect("loader is constructed with success");
259        let tz: String = uenum::open_time_zones()?.take(1).map(|e| e.unwrap()).collect();
260        assert_eq!(tz, "ACT");
261        Ok(())
262    }
263
264    #[test]
265    fn test_tz_res_loading_with_validation_valid() -> Result<(), Error> {
266        let _loader = Loader::new_with_tz_resources_and_validation(
267            "/pkg/data/tzdata/icu/44/le",
268            "/pkg/data/tzdata/revision.txt",
269        )
270        .expect("loader is constructed successfully");
271        let tz: String = uenum::open_time_zones()?.take(1).map(|e| e.unwrap()).collect();
272        assert_eq!(tz, "ACT");
273        Ok(())
274    }
275
276    #[test]
277    fn test_tz_res_loading_with_validation_invalid() -> Result<(), Error> {
278        let result = Loader::new_with_tz_resources_and_validation(
279            "/pkg/data/tzdata/icu/44/le",
280            "/pkg/data/test_inconsistent_revision.txt",
281        );
282        let err = result.unwrap_err();
283        assert_matches!(err, Error::Status(zx::Status::IO_DATA_INTEGRITY, Some(_)));
284        Ok(())
285    }
286}