_intl_lookup_rustc_static/
lib.rs

1// Copyright 2020 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use anyhow::{Context, Result};
6use log::error;
7use std::collections::BTreeMap;
8use std::{ffi, fs, io, mem, str};
9use {
10    intl_model as model, rust_icu_common as ucommon, rust_icu_sys as usys, rust_icu_uloc as uloc,
11};
12
13/// The directory where localized resources are kept.
14pub(crate) const ASSETS_DIR: &str = "/pkg/data/assets/locales";
15
16#[repr(i8)]
17#[derive(Debug, PartialEq)]
18pub enum LookupStatus {
19    // No error.
20    OK = 0,
21    /// The value requested is not available.
22    Unavailable = 1,
23    /// The argument passed in by the user is not valid.
24    ArgumentError = 2,
25    /// Some internal error happened. Consult logs for details.
26    Internal = 111,
27}
28
29/// The C API supported by the Lookup module.
30///
31/// The C API is used for FFI interfacing with other languages that support
32/// C ABI FFI.
33trait CApi {
34    /// Looks a message up by its unique `message_id`.  A nonzero status is
35    /// returned if the message is not found.
36    fn string(&self, message_id: u64) -> Result<&ffi::CStr, LookupStatus>;
37}
38
39impl From<str::Utf8Error> for LookupStatus {
40    fn from(e: str::Utf8Error) -> Self {
41        error!("intl: utf-8: {:?}", e);
42        LookupStatus::Unavailable
43    }
44}
45
46impl From<anyhow::Error> for LookupStatus {
47    fn from(e: anyhow::Error) -> Self {
48        error!("intl: general: {:?}", e);
49        LookupStatus::Internal
50    }
51}
52
53impl From<ucommon::Error> for LookupStatus {
54    fn from(e: ucommon::Error) -> Self {
55        error!("intl: icu: {:?}", e);
56        LookupStatus::Internal
57    }
58}
59
60/// Instantiates a fake Lookup instance, which is useful for tests that don't
61/// want to make a full end-to-end localization setup.  
62///
63/// The fake is simplistic and it is the intention that it provides you with
64/// some default fake behaviors.  The behaviors are as follows at the moment,
65/// and more could be added if needed.
66///
67/// - If `locale_ids` contains the string `en-US`, the constructor function
68///   in the FFI layer will return [LookupStatus::Unavailable].
69/// - If the message ID pased to `Lookup::String()` is exactly 1, the fake
70///   returns `Hello {person}!`, so that you can test 1-parameter formatting.
71/// - Otherwise, for an even mesage ID it returns "Hello world!", or for
72///   an odd message ID returns [LookupStatus::Unavailable].
73///
74/// The implementation of the fake itself is done in rust behind a FFI ABI,
75/// see the package //src/lib/intl/lookup/rust for details.
76pub struct FakeLookup {
77    hello: ffi::CString,
78    hello_person: ffi::CString,
79}
80
81impl FakeLookup {
82    /// Create a new `FakeLookup`.
83    pub fn new() -> FakeLookup {
84        let hello =
85            ffi::CString::new("Hello world!").expect("CString from known value should never fail");
86        let hello_person = ffi::CString::new("Hello {person}!")
87            .expect("CString from known value should never fail");
88        FakeLookup { hello, hello_person }
89    }
90}
91
92impl CApi for FakeLookup {
93    /// A fake implementation of `string` for testing.
94    ///
95    /// Returns "Hello world" if passed an even `message_id`, and `LookupStatus::UNAVAILABLE` when
96    /// passed an odd message_id. Used to test the FFI.
97    fn string(&self, message_id: u64) -> Result<&ffi::CStr, LookupStatus> {
98        if message_id == 1 {
99            return Ok(self.hello_person.as_c_str());
100        }
101        match message_id % 2 == 0 {
102            true => Ok(self.hello.as_c_str()),
103            false => Err(LookupStatus::Unavailable),
104        }
105    }
106}
107
108#[allow(clippy::missing_safety_doc)] // TODO(https://fxbug.dev/42181460)
109#[no_mangle]
110pub unsafe extern "C" fn intl_lookup_new_fake_for_test(
111    len: libc::size_t,
112    array: *mut *const libc::c_char,
113    status: *mut i8,
114) -> *const FakeLookup {
115    *status = LookupStatus::OK as i8;
116    let rsize = len as usize;
117    let input: Vec<*const libc::c_char> = Vec::from_raw_parts(array, rsize, rsize);
118    // Do not drop the vector we don't own.
119    let input = mem::ManuallyDrop::new(input);
120
121    for raw in input.iter() {
122        let cstr = ffi::CStr::from_ptr(*raw).to_str().expect("not a valid UTF-8");
123        if cstr == "en-US" {
124            *status = LookupStatus::Unavailable as i8;
125            return std::ptr::null::<FakeLookup>();
126        }
127    }
128    Box::into_raw(Box::new(FakeLookup::new()))
129}
130#[allow(clippy::missing_safety_doc)] // TODO(https://fxbug.dev/42181460)
131#[no_mangle]
132pub unsafe extern "C" fn intl_lookup_delete_fake_for_test(this: *mut FakeLookup) {
133    generic_delete(this);
134}
135
136#[allow(clippy::missing_safety_doc)] // TODO(https://fxbug.dev/42181460)
137#[no_mangle]
138pub unsafe extern "C" fn intl_lookup_new(
139    len: libc::size_t,
140    array: *mut *const libc::c_char,
141    status: *mut i8,
142) -> *const Lookup {
143    *status = LookupStatus::OK as i8;
144    let rsize = len as usize;
145    let input: Vec<*const libc::c_char> = Vec::from_raw_parts(array, rsize, rsize);
146    // Do not drop the vector we don't own.
147    let input = mem::ManuallyDrop::new(input);
148
149    let mut locales = vec![];
150    for raw in input.iter() {
151        let cstr = ffi::CStr::from_ptr(*raw).to_str();
152        match cstr {
153            Err(e) => {
154                error!("intl::intl_lookup_new::c_str: {:?}", &e);
155                let ls: LookupStatus = e.into();
156                *status = ls as i8;
157                return std::ptr::null::<Lookup>();
158            }
159            Ok(s) => {
160                locales.push(s);
161            }
162        }
163    }
164    let data = icu_data::Loader::new().expect("icu data loaded");
165    let lookup_or = Lookup::new(data, &locales[..]);
166    match lookup_or {
167        Ok(lookup) => Box::into_raw(Box::new(lookup)),
168        Err(e) => {
169            error!("intl::intl_lookup_new: {:?}", &e);
170            let ls: LookupStatus = e.into();
171            *status = ls as i8;
172            std::ptr::null::<Lookup>()
173        }
174    }
175}
176
177#[allow(clippy::missing_safety_doc)] // TODO(https://fxbug.dev/42181460)
178#[no_mangle]
179pub unsafe extern "C" fn intl_lookup_delete(instance: *mut Lookup) {
180    generic_delete(instance);
181}
182
183#[allow(clippy::missing_safety_doc)] // TODO(https://fxbug.dev/42181460)
184#[no_mangle]
185pub unsafe extern "C" fn intl_lookup_string_fake_for_test(
186    this: *const FakeLookup,
187    id: u64,
188    status: *mut i8,
189) -> *const libc::c_char {
190    generic_string(this, id, status)
191}
192
193unsafe fn generic_string<T: CApi>(this: *const T, id: u64, status: *mut i8) -> *const libc::c_char {
194    *status = LookupStatus::OK as i8;
195    match this.as_ref().unwrap().string(id) {
196        Err(e) => {
197            *status = e as i8;
198            std::ptr::null()
199        }
200        Ok(s) => s.as_ptr() as *const libc::c_char,
201    }
202}
203
204unsafe fn generic_delete<T>(instance: *mut T) {
205    let _ = Box::from_raw(instance);
206}
207
208#[allow(clippy::missing_safety_doc)] // TODO(https://fxbug.dev/42181460)
209#[no_mangle]
210pub unsafe extern "C" fn intl_lookup_string(
211    this: *const Lookup,
212    id: u64,
213    status: *mut i8,
214) -> *const libc::c_char {
215    *status = LookupStatus::OK as i8;
216    match this.as_ref().unwrap().string(id) {
217        Err(e) => {
218            *status = e as i8;
219            std::ptr::null()
220        }
221        Ok(s) => s.as_ptr() as *const libc::c_char,
222    }
223}
224
225// Contains the message catalog ready for external consumption.  Specifically,
226// provides C-style messages
227pub struct Catalog {
228    locale_to_message: BTreeMap<String, BTreeMap<u64, ffi::CString>>,
229}
230
231impl Catalog {
232    fn new() -> Catalog {
233        let locale_to_message = BTreeMap::new();
234        Catalog { locale_to_message }
235    }
236
237    fn add(&mut self, model: model::Model) -> Result<()> {
238        let locale_id = model.locale();
239        let mut messages: BTreeMap<u64, ffi::CString> = BTreeMap::new();
240        for (id, msg) in model.messages() {
241            let c_msg = ffi::CString::new(msg.clone())
242                .with_context(|| format!("interior NUL in  {:?}", msg))?;
243            messages.insert(*id, c_msg);
244        }
245        self.locale_to_message.insert(locale_id.to_string(), messages);
246        Ok(())
247    }
248
249    fn get(&self, locale: &str, id: u64) -> Option<&ffi::CStr> {
250        self.locale_to_message
251            .get(locale)
252            .map(|messages| messages.get(&id))
253            .flatten()
254            .map(|cstring| cstring.as_c_str())
255    }
256}
257
258/// Implements localized string lookup.
259///
260/// Requires that the ICU data loader is configured and that the ICU data are registered in the
261/// program's package.  See the [rust
262/// documentation](https://fuchsia.dev/fuchsia-src/development/internationalization/icu_data#rust_example)
263/// for detailed instructions.
264///
265/// ```ignore
266/// use intl_lookup::Lookup;
267/// let icu_data = icu_data::Loader::new().expect("icu data loaded");
268/// let l = Lookup::new(icu_data, &vec!["es"])?;
269/// assert_eq!("el stringo", l.str(ftest::MessageIds::StringName as u64)?);
270/// ```
271pub struct Lookup {
272    requested: Vec<uloc::ULoc>,
273    catalog: Catalog,
274    // The loader is required to ensure that the unicode locale data is kept
275    // in memory while this Lookup is in use.
276    #[allow(dead_code)]
277    icu_data: icu_data::Loader,
278}
279
280impl Lookup {
281    /// Creates a new instance of Lookup, with the default ways to look up the
282    /// data.
283    pub fn new(icu_data: icu_data::Loader, requested: &[&str]) -> Result<Lookup> {
284        let supported_locales =
285            Lookup::get_available_locales().with_context(|| "while creating Lookup")?;
286        // Load all available locale maps.
287        let catalog = Lookup::load_locales(&supported_locales[..])
288            .with_context(|| "while loading locales")?;
289        Lookup::new_internal(icu_data, requested, &supported_locales, catalog)
290    }
291
292    // Loads all supported locales from disk.
293    fn load_locales(supported: &[impl AsRef<str>]) -> Result<Catalog> {
294        let mut catalog = Catalog::new();
295
296        // In the future we may decide to load only the locales we actually need.
297        for locale in supported {
298            // Directory names look like: ".../assets/locales/en-US".
299            let mut locale_dir_path = std::path::PathBuf::from(ASSETS_DIR);
300            locale_dir_path.push(locale.as_ref());
301
302            let locale_dir = std::fs::read_dir(&locale_dir_path)
303                .with_context(|| format!("while reading {:?}", &locale_dir_path))?;
304            for entry in locale_dir {
305                let path = entry?.path();
306                let file = fs::File::open(&path)
307                    .with_context(|| format!("while trying to open {:?}", &path))?;
308                let file = io::BufReader::new(file);
309                let model = model::Model::from_json_reader(file)
310                    .with_context(|| format!("while reading {:?}", &path))?;
311                catalog.add(model)?;
312            }
313        }
314        Ok(catalog)
315    }
316
317    /// Create a new [Lookup] from parts.  Only to be used in tests.
318    #[cfg(test)]
319    pub fn new_from_parts(
320        icu_data: icu_data::Loader,
321        requested: &[&str],
322        supported: &Vec<String>,
323        catalog: Catalog,
324    ) -> Result<Lookup> {
325        Lookup::new_internal(icu_data, requested, supported, catalog)
326    }
327
328    fn new_internal(
329        icu_data: icu_data::Loader,
330        requested: &[&str],
331        supported: &Vec<String>,
332        catalog: Catalog,
333    ) -> Result<Lookup> {
334        let mut supported_locales = supported
335            .iter()
336            .map(|s: &String| uloc::ULoc::try_from(s.as_str()))
337            .collect::<Result<Vec<_>, _>>()
338            .with_context(|| "while determining supported locales")?;
339
340        // Work around a locale fallback resolution bug
341        // https://unicode-org.atlassian.net/browse/ICU-20931 which has been fixed in ICU 67.  This
342        // has to stay in place until Fuchsia starts using ICU version 67.
343        supported_locales.push(uloc::ULoc::try_from("und-und")?);
344        let supported_locales = supported_locales;
345
346        // Compute a fallback for each requested locale, and fail if none is available.
347        let mut requested_locales = vec![];
348        for locale in requested.iter() {
349            let (maybe_accepted_locale, accept_result) = uloc::accept_language(
350                vec![uloc::ULoc::try_from(*locale)
351                    .with_context(|| format!("could not parse as locale: {:}", &locale))?],
352                supported_locales.clone(),
353            )?;
354            match accept_result {
355                usys::UAcceptResult::ULOC_ACCEPT_FAILED => {
356                    // This may happen if the locale is not at all part of the
357                    // set of supported locales.
358                }
359                _ => match maybe_accepted_locale {
360                    None => {
361                        return Err(anyhow::anyhow!(
362                            "no matching locale found for: requested: {:?}, supported: {:?}",
363                            &locale,
364                            &supported_locales
365                        ));
366                    }
367                    Some(loc) => {
368                        requested_locales.push(loc);
369                    }
370                },
371            }
372        }
373        // We failed to find locales to request from the list of supported locales.
374        if requested_locales.is_empty() {
375            return Err(anyhow::anyhow!(
376                "no matching locale found for: requested: {:?}, supported: {:?}",
377                &requested,
378                &supported_locales
379            ));
380        }
381        Ok(Lookup { requested: requested_locales, catalog, icu_data })
382    }
383
384    #[cfg(test)]
385    fn get_available_locales_for_test() -> Result<Vec<String>> {
386        Lookup::get_available_locales()
387    }
388
389    // Returns the list of locales for which there are resources present in
390    // the locale assets directory.  Errors are returned if the locale assets
391    // directory is malformed: since it is prepared at compile time, such an
392    // occurrence means that the program is corrupted.
393    fn get_available_locales() -> Result<Vec<String>> {
394        let locale_dirs = std::fs::read_dir(ASSETS_DIR)
395            .with_context(|| format!("while reading {}", ASSETS_DIR))?;
396        let mut available_locales: Vec<String> = vec![];
397        for entry_or in locale_dirs {
398            let entry =
399                entry_or.with_context(|| format!("while reading entries in {}", ASSETS_DIR))?;
400            // We only ever expect directories corresponding to locale names
401            // to be UTF-8 encoded, so this conversion will normally always
402            // succeed for directories in `ASSETS_DIR`.
403            let name = entry.file_name().into_string().map_err(|os_string| {
404                anyhow::anyhow!("OS path not convertible to UTF-8: {:?}", os_string)
405            })?;
406            let entry_type = entry
407                .file_type()
408                .with_context(|| format!("while looking up file type for: {:?}", name))?;
409            if entry_type.is_dir() {
410                available_locales.push(name);
411            }
412        }
413        Ok(available_locales)
414    }
415
416    /// Looks up the message by its key, a rust API version of [API::string].
417    pub fn str(&self, id: u64) -> Result<&str, LookupStatus> {
418        Ok(self
419            .string(id)?
420            .to_str()
421            .with_context(|| format!("str(): while looking up id: {}", &id))?)
422    }
423}
424
425impl CApi for Lookup {
426    /// See the documentation for `API` for details.
427    fn string(&self, id: u64) -> Result<&ffi::CStr, LookupStatus> {
428        for locale in self.requested.iter() {
429            if let Some(s) = self.catalog.get(&locale.to_language_tag(false)?, id) {
430                return Ok(s);
431            }
432        }
433        Err(LookupStatus::Unavailable)
434    }
435}
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440    use fidl_fuchsia_intl_test as ftest;
441    use std::collections::HashSet;
442
443    #[test]
444    fn lookup_en() -> Result<(), LookupStatus> {
445        let icu_data = icu_data::Loader::new().expect("icu data loaded");
446        let l = Lookup::new(icu_data, &vec!["en"])?;
447        assert_eq!("text_string", l.string(ftest::MessageIds::StringName as u64)?.to_str()?);
448        assert_eq!("text_string_2", l.string(ftest::MessageIds::StringName2 as u64)?.to_str()?);
449        Ok(())
450    }
451
452    #[test]
453    fn lookup_fr() -> Result<(), LookupStatus> {
454        let icu_data = icu_data::Loader::new().expect("icu data loaded");
455        let l = Lookup::new(icu_data, &vec!["fr"])?;
456        assert_eq!("le string", l.string(ftest::MessageIds::StringName as u64)?.to_str()?);
457        assert_eq!("le string 2", l.string(ftest::MessageIds::StringName2 as u64)?.to_str()?);
458        Ok(())
459    }
460
461    #[test]
462    fn lookup_es() -> Result<(), LookupStatus> {
463        let icu_data = icu_data::Loader::new().expect("icu data loaded");
464        let l = Lookup::new(icu_data, &vec!["es"])?;
465        assert_eq!("el stringo", l.string(ftest::MessageIds::StringName as u64)?.to_str()?);
466        assert_eq!("el stringo 2", l.string(ftest::MessageIds::StringName2 as u64)?.to_str()?);
467        Ok(())
468    }
469
470    // When "es" is preferred, use it.
471    #[test]
472    fn lookup_es_en() -> Result<(), LookupStatus> {
473        let icu_data = icu_data::Loader::new().expect("icu data loaded");
474        let l = Lookup::new(icu_data, &vec!["es", "en"])?;
475        assert_eq!("el stringo", l.string(ftest::MessageIds::StringName as u64)?.to_str()?);
476        assert_eq!("el stringo 2", l.string(ftest::MessageIds::StringName2 as u64)?.to_str()?);
477        Ok(())
478    }
479
480    #[test]
481    fn lookup_es_419_fallback() -> Result<(), LookupStatus> {
482        let icu_data = icu_data::Loader::new().expect("icu data loaded");
483        let l = Lookup::new(icu_data, &vec!["es-419-u-ca-gregorian"]).expect("locale exists");
484        assert_eq!("el stringo", l.string(ftest::MessageIds::StringName as u64)?.to_str()?);
485        assert_eq!("el stringo 2", l.string(ftest::MessageIds::StringName2 as u64)?.to_str()?);
486        Ok(())
487    }
488
489    #[test]
490    fn nonexistent_locale_rejected() -> Result<()> {
491        let icu_data = icu_data::Loader::new().expect("icu data loaded");
492        match Lookup::new(icu_data, &vec!["nonexistent-locale"]) {
493            Ok(_) => Err(anyhow::anyhow!("unexpectedly accepted nonexistent locale")),
494            Err(_) => Ok(()),
495        }
496    }
497
498    #[test]
499    fn locale_fallback_accounted_for() -> Result<()> {
500        let icu_data = icu_data::Loader::new().expect("icu data loaded");
501        // These locales are directly supported by the tests.
502        Lookup::new(icu_data.clone(), &vec!["en"])?;
503        Lookup::new(icu_data.clone(), &vec!["fr"])?;
504        Lookup::new(icu_data.clone(), &vec!["es"])?;
505
506        // The following languages are not directly supported.  Instead they
507        // are supported via the locale fallback mechanism.
508
509        // Falls back to "en".
510        Lookup::new(icu_data.clone(), &vec!["en-US"])?;
511        // Falls back to "es".
512        Lookup::new(icu_data.clone(), &vec!["es-ES"])?;
513        // Falls back to "es", too.
514        Lookup::new(icu_data.clone(), &vec!["es-419"])?;
515        Ok(())
516    }
517
518    // Exercises the fake behaviors which are part of the fake spec.  The fake
519    // behaviors may evolve in the future, but this test gives out the ones that
520    // currently exist.
521    #[test]
522    fn test_fake_lookup() -> Result<(), LookupStatus> {
523        let l = FakeLookup::new();
524        assert_eq!("Hello {person}!", l.string(1)?.to_str()?);
525        // Fake lookups always return "Hello world!", that's a FakeLookup
526        // feature.
527        assert_eq!("Hello world!", l.string(10)?.to_str()?);
528        assert_eq!("Hello world!", l.string(12)?.to_str()?);
529        assert_eq!(LookupStatus::Unavailable, l.string(11).unwrap_err());
530        assert_eq!(LookupStatus::Unavailable, l.string(41).unwrap_err());
531        Ok(())
532    }
533
534    #[test]
535    fn test_real_lookup() -> Result<(), LookupStatus> {
536        let icu_data = icu_data::Loader::new().expect("icu data loaded");
537        let l = Lookup::new(icu_data, &vec!["es"])?;
538        assert_eq!("el stringo", l.str(ftest::MessageIds::StringName as u64)?);
539        Ok(())
540    }
541
542    /// Locales have been made part of the resources of the test package.
543    #[test]
544    fn test_available_locales() -> Result<()> {
545        // Iteration order is not deterministic.
546        let expected: HashSet<String> = ["es", "en", "fr"].iter().map(|s| s.to_string()).collect();
547        assert_eq!(expected, Lookup::get_available_locales_for_test()?.into_iter().collect());
548        Ok(())
549    }
550
551    /// If an unsupported locale has been requested, ignore it in the list and
552    /// fall back to something else.
553    #[test]
554    fn ignore_unavailable_locales() {
555        let icu_data = icu_data::Loader::new().expect("icu data loaded");
556        let l = Lookup::new(icu_data, &vec!["sr", "es"]).expect("Lookup::new success");
557        assert_eq!(
558            "el stringo",
559            l.str(ftest::MessageIds::StringName as u64).expect("Lookup::str success")
560        );
561    }
562
563    /// If there is nothing to fall back to, report an error.
564    #[test]
565    fn report_unavailable_locales_without_alternative() {
566        let icu_data = icu_data::Loader::new().expect("icu data loaded");
567        let l = Lookup::new(icu_data, &vec!["sr"]);
568        assert!(l.is_err());
569    }
570}