rust_icu_common/
lib.rs

1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! # Commonly used functionality adapters.
16//!
17//! At the moment, this crate contains the declaration of various errors
18
19use {
20    anyhow::anyhow,
21    rust_icu_sys as sys,
22    std::{ffi, os, convert::TryInto},
23    thiserror::Error,
24};
25
26/// Represents a Unicode error, resulting from operations of low-level ICU libraries.
27///
28/// This is modeled after absl::Status in the Abseil library, which provides ways
29/// for users to avoid dealing with all the numerous error codes directly.
30#[derive(Error, Debug)]
31pub enum Error {
32    /// The error originating in the underlying sys library.
33    ///
34    /// At the moment it is possible to produce an Error which has a zero error code (i.e. no
35    /// error), because it makes it unnecessary for users to deal with error codes directly.  It
36    /// does make for a bit weird API, so we may turn it around a bit.  Ideally, it should not be
37    /// possible to have an Error that isn't really an error.
38    #[error("ICU error code: {}", _0)]
39    Sys(sys::UErrorCode),
40
41    /// Errors originating from the wrapper code.  For example when pre-converting input into
42    /// UTF8 for input that happens to be malformed.
43    #[error(transparent)]
44    Wrapper(#[from] anyhow::Error),
45}
46
47impl Error {
48    /// The error code denoting no error has happened.
49    pub const OK_CODE: sys::UErrorCode = sys::UErrorCode::U_ZERO_ERROR;
50
51    /// Returns true if this error code corresponds to no error.
52    pub fn is_ok(code: sys::UErrorCode) -> bool {
53        code == Self::OK_CODE
54    }
55
56    /// Creates a new error from the supplied status.  Ok is returned if the error code does not
57    /// correspond to an error code (as opposed to OK or a warning code).
58    pub fn ok_or_warning(status: sys::UErrorCode) -> Result<(), Self> {
59        if Self::is_ok(status) || status < Self::OK_CODE {
60            Ok(())
61        } else {
62            Err(Error::Sys(status))
63        }
64    }
65
66    /// Creates a new error from the supplied status.  Ok is returned if the
67    /// error code does not constitute an error in preflight mode.
68    ///
69    /// This error check explicitly ignores the buffer overflow error when reporting whether it
70    /// contains an error condition.
71    ///
72    /// Preflight calls to ICU libraries do a read-only scan of the input to determine the buffer
73    /// sizes required on the output in case of conversion calls such as `ucal_strFromUTF8`.  The
74    /// way this call is made is to offer a zero-capacity buffer (which could be pointed to by a
75    /// `NULL` pointer), and then call the respective function.  The function will compute the
76    /// buffer size, but will also return a bogus buffer overflow error.
77    pub fn ok_preflight(status: sys::UErrorCode) -> Result<(), Self> {
78        if status > Self::OK_CODE && status != sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR {
79            Err(Error::Sys(status))
80        } else {
81            Ok(())
82        }
83    }
84
85    /// Returns true if this error has the supplied `code`.
86    pub fn is_code(&self, code: sys::UErrorCode) -> bool {
87        if let Error::Sys(c) = self {
88            return *c == code;
89        }
90        false
91    }
92
93    /// Returns true if the error is an error, not a warning.
94    ///
95    /// The ICU4C library has error codes for errors and warnings.
96    pub fn is_err(&self) -> bool {
97        match self {
98            Error::Sys(code) => *code > sys::UErrorCode::U_ZERO_ERROR,
99            Error::Wrapper(_) => true,
100        }
101    }
102
103    /// Return true if there was an error in a preflight call.
104    ///
105    /// This error check explicitly ignores the buffer overflow error when reporting whether it
106    /// contains an error condition.
107    ///
108    /// Preflight calls to ICU libraries do a read-only scan of the input to determine the buffer
109    /// sizes required on the output in case of conversion calls such as `ucal_strFromUTF8`.  The
110    /// way this call is made is to offer a zero-capacity buffer (which could be pointed to by a
111    /// `NULL` pointer), and then call the respective function.  The function will compute the
112    /// buffer size, but will also return a bogus buffer overflow error.
113    pub fn is_preflight_err(&self) -> bool {
114        // We may expand the set of error codes that are exempt from error checks in preflight.
115        self.is_err() && !self.is_code(sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR)
116    }
117
118    /// Returns true if the error is, in fact, a warning (nonfatal).
119    pub fn is_warn(&self) -> bool {
120        match self {
121            Error::Sys(c) => *c < sys::UErrorCode::U_ZERO_ERROR,
122            _ => false,
123        }
124    }
125
126    pub fn wrapper(source: impl Into<anyhow::Error>) -> Self {
127        Self::Wrapper(source.into())
128    }
129}
130
131impl From<ffi::NulError> for Error {
132    fn from(e: ffi::NulError) -> Self {
133        Self::wrapper(e)
134    }
135}
136
137impl From<std::str::Utf8Error> for Error {
138    fn from(e: std::str::Utf8Error) -> Self {
139        Self::wrapper(e)
140    }
141}
142
143impl From<std::string::FromUtf8Error> for Error {
144    fn from(e: std::string::FromUtf8Error) -> Self {
145        Self::wrapper(e)
146    }
147}
148
149impl Into<std::fmt::Error> for Error {
150    fn into(self) -> std::fmt::Error {
151        // It is not possible to transfer any info into std::fmt::Error, so we log instead.
152        eprintln!("error while formatting: {:?}", &self);
153        std::fmt::Error {}
154    }
155}
156
157/// `type_name` is the type to implement drop for.
158/// `impl_function_name` is the name of the function that implements
159/// memory deallocation.  It is assumed that the type has an internal
160/// representation wrapped in a [std::ptr::NonNull].
161///
162/// Example:
163///
164/// ```rust ignore
165/// pub struct UNumberFormatter {
166///   rep: std::ptr::NonNull<Foo>,
167/// }
168/// //...
169/// simple_drop_impl!(UNumberFormatter, unumf_close);
170/// ```
171#[macro_export]
172macro_rules! simple_drop_impl {
173    ($type_name:ty, $impl_function_name:ident) => {
174        impl $crate::__private_do_not_use::Drop for $type_name {
175            #[doc = concat!("Implements `", stringify!($impl_function_name), "`.")]
176            fn drop(&mut self) {
177                unsafe {
178                    $crate::__private_do_not_use::versioned_function!($impl_function_name)
179                        (self.rep.as_ptr());
180                }
181            }
182        }
183    };
184}
185
186/// Helper for calling ICU4C `uloc` methods that require a resizable output string buffer.
187pub fn buffered_string_method_with_retry<F>(
188    mut method_to_call: F,
189    buffer_capacity: usize,
190) -> Result<String, Error>
191where
192    F: FnMut(*mut os::raw::c_char, i32, *mut sys::UErrorCode) -> i32,
193{
194    let mut status = Error::OK_CODE;
195    let mut buf: Vec<u8> = vec![0; buffer_capacity];
196
197    // Requires that any pointers that are passed in are valid.
198    let full_len: i32 = {
199        assert!(Error::is_ok(status));
200        method_to_call(
201            buf.as_mut_ptr() as *mut os::raw::c_char,
202            buffer_capacity as i32,
203            &mut status,
204        )
205    };
206
207    // ICU methods are inconsistent in whether they silently truncate the output or treat
208    // the overflow as an error, so we need to check both cases.
209    if status == sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR ||
210       (Error::is_ok(status) &&
211            full_len > buffer_capacity
212                .try_into()
213                .map_err(|e| Error::wrapper(e))?) {
214
215        status = Error::OK_CODE;
216        assert!(full_len > 0);
217        let full_len: usize = full_len
218            .try_into()
219            .map_err(|e| Error::wrapper(e))?;
220        buf.resize(full_len, 0);
221
222        // Same unsafe requirements as above, plus full_len must be exactly the output
223        // buffer size.
224        {
225            assert!(Error::is_ok(status));
226            method_to_call(
227                buf.as_mut_ptr() as *mut os::raw::c_char,
228                full_len as i32,
229                &mut status,
230            )
231        };
232    }
233
234    Error::ok_or_warning(status)?;
235
236    // Adjust the size of the buffer here.
237    if full_len >= 0 {
238        let full_len: usize = full_len
239            .try_into()
240            .map_err(|e| Error::wrapper(e))?;
241        buf.resize(full_len, 0);
242    }
243    String::from_utf8(buf).map_err(|e| e.utf8_error().into())
244}
245
246/// There is a slew of near-identical method calls which differ in the type of
247/// the input argument and the name of the function to invoke.
248///
249/// The invocation:
250///
251/// ```rust ignore
252/// impl ... {
253///   // ...
254///   format_ustring_for_type!(format_f64, unum_formatDouble, f64);
255/// }
256/// ```
257///
258/// allows us to bind the function:
259///
260/// ```c++ ignore
261/// int32_t unum_formatDouble(
262///     const UNumberFormat* fmt,
263///     double number,
264///     UChar* result,
265///     int32_t result_length,
266///     UFieldPosition* pos,
267///     UErrorCode *status)
268/// ```
269///
270/// as:
271///
272/// ```rust ignore
273/// impl ... {
274///   format_f64(&self /* format */, value: f64) -> Result<ustring::UChar, common::Error>;
275/// }
276/// ```
277#[macro_export]
278macro_rules! format_ustring_for_type{
279    ($method_name:ident, $function_name:ident, $type_decl:ty) => (
280        #[doc = concat!("Implements `", stringify!($function_name), "`.")]
281        pub fn $method_name(&self, number: $type_decl) -> Result<String, common::Error> {
282            let result = paste::item! {
283                self. [< $method_name _ustring>] (number)?
284            };
285            String::try_from(&result)
286        }
287
288        // Should be able to use https://github.com/google/rust_icu/pull/144 to
289        // make this even shorter.
290        paste::item! {
291            #[doc = concat!("Implements `", stringify!($function_name), "`.")]
292            pub fn [<$method_name _ustring>] (&self, param: $type_decl) -> Result<ustring::UChar, common::Error> {
293                const CAPACITY: usize = 200;
294                buffered_uchar_method_with_retry!(
295                    [< $method_name _ustring_impl >],
296                    CAPACITY,
297                    [ rep: *const sys::UNumberFormat, param: $type_decl, ],
298                    [ field: *mut sys::UFieldPosition, ]
299                    );
300
301                [<$method_name _ustring_impl>](
302                    versioned_function!($function_name),
303                    self.rep.as_ptr(),
304                    param,
305                    // The field position is unused for now.
306                    0 as *mut sys::UFieldPosition,
307                    )
308            }
309        }
310    )
311}
312
313/// Expands into a getter method that forwards all its arguments and returns a fallible value which
314/// is the same as the value returned by the underlying function.
315///
316/// The invocation:
317///
318/// ```rust ignore
319/// impl _ {
320///     generalized_fallible_getter!(
321///         get_context,
322///         unum_getContext,
323///         [context_type: sys::UDisplayContextType, ],
324///         sys::UDisplayContext
325///     );
326/// }
327/// ```
328///
329/// allows us to bind the function:
330///
331/// ```c++ ignore
332/// UDisplayContext unum_getContext(
333///     const SOMETYPE* t,
334///     UDisplayContextType type,
335///     UErrorCode* status
336/// );
337/// ```
338///
339/// which then becomes:
340///
341/// ```rust ignore
342/// impl _ {
343///   fn get_context(&self, context_type: sys::UDisplayContextType) -> Result<sys::UDisplayContext, common::Error>;
344/// }
345/// ```
346/// where `Self` has an internal representation named exactly `Self::rep`.
347#[macro_export]
348macro_rules! generalized_fallible_getter{
349    ($top_level_method_name:ident, $impl_name:ident, [ $( $arg:ident: $arg_type:ty ,)* ],  $ret_type:ty) => (
350        #[doc = concat!("Implements `", stringify!($impl_name), "`.")]
351        pub fn $top_level_method_name(&self, $( $arg: $arg_type, )* ) -> Result<$ret_type, common::Error> {
352            let mut status = common::Error::OK_CODE;
353            let result: $ret_type = unsafe {
354                assert!(common::Error::is_ok(status));
355                versioned_function!($impl_name)(self.rep.as_ptr(), $( $arg, )* &mut status)
356            };
357            common::Error::ok_or_warning(status)?;
358            Ok(result)
359        }
360    )
361}
362
363/// Expands into a setter methods that forwards all its arguments between []'s and returns a
364/// Result<(), common::Error>.
365///
366/// The invocation:
367///
368/// ```rust ignore
369/// impl _ {
370///     generalized_fallible_setter!(
371///         get_context,
372///         unum_getContext,
373///         [context_type: sys::UDisplayContextType, ]
374///     );
375/// }
376/// ```
377///
378/// allows us to bind the function:
379///
380/// ```c++ ignore
381/// UDisplayContext unum_setContext(
382///     const SOMETYPE* t,
383///     UDisplayContext value,
384///     UErrorCode* status
385/// );
386/// ```
387///
388/// which then becomes:
389///
390/// ```rust ignore
391/// impl _ {
392///   fn set_context(&self, value: sys::UDisplayContext) -> Result<(), common::Error>;
393/// }
394/// ```
395/// where `Self` has an internal representation named exactly `Self::rep`.
396#[macro_export]
397macro_rules! generalized_fallible_setter{
398    ($top_level_method_name:ident, $impl_name:ident, [ $( $arg:ident : $arg_type:ty, )* ]) => (
399        generalized_fallible_getter!(
400            $top_level_method_name,
401            $impl_name,
402            [ $( $arg: $arg_type, )* ],
403            ());
404    )
405}
406
407/// Used to simulate an array of C-style strings.
408#[derive(Debug)]
409pub struct CStringVec {
410    // The internal representation of the vector of C strings.
411    rep: Vec<ffi::CString>,
412    // Same as rep, but converted into C pointers.
413    c_rep: Vec<*const os::raw::c_char>,
414}
415
416impl CStringVec {
417    /// Creates a new C string vector from the provided rust strings.
418    ///
419    /// C strings are continuous byte regions that end in `\0` and do not
420    /// contain `\0` anywhere else.
421    ///
422    /// Use `as_c_array` to get an unowned raw pointer to the array, to pass
423    /// into FFI C code.
424    pub fn new(strings: &[&str]) -> Result<Self, Error> {
425        let mut rep = Vec::with_capacity(strings.len());
426        // Convert all to asciiz strings and insert into the vector.
427        for elem in strings {
428            let asciiz = ffi::CString::new(*elem)?;
429            rep.push(asciiz);
430        }
431        let c_rep = rep.iter().map(|e| e.as_ptr()).collect();
432        Ok(CStringVec { rep, c_rep })
433    }
434
435    /// Returns the underlying array of C strings as a C array pointer.  The
436    /// array must not change after construction to ensure that this pointer
437    /// remains valid.
438    pub fn as_c_array(&self) -> *const *const os::raw::c_char {
439        self.c_rep.as_ptr() as *const *const os::raw::c_char
440    }
441
442    /// Returns the number of elements in the vector.
443    pub fn len(&self) -> usize {
444        self.rep.len()
445    }
446
447    /// Returns whether the vector is empty.
448    pub fn is_empty(&self) -> bool {
449        self.rep.is_empty()
450    }
451}
452
453// Items used by macros. Unstable private API; do not use.
454#[doc(hidden)]
455pub mod __private_do_not_use {
456    pub use Drop;
457    pub use rust_icu_sys::versioned_function;
458}
459
460#[cfg(test)]
461mod tests {
462    use super::*;
463
464    #[test]
465    fn test_error_code() {
466        let error = Error::ok_or_warning(sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR)
467            .err()
468            .unwrap();
469        assert!(error.is_code(sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR));
470        assert!(!error.is_preflight_err());
471        assert!(!error.is_code(sys::UErrorCode::U_ZERO_ERROR));
472    }
473
474    #[test]
475    fn test_into_char_array() {
476        let values = vec!["eenie", "meenie", "minie", "moe"];
477        let c_array = CStringVec::new(&values).expect("success");
478        assert_eq!(c_array.len(), 4);
479    }
480
481    #[test]
482    fn test_with_embedded_nul_byte() {
483        let values = vec!["hell\0x00o"];
484        let _c_array = CStringVec::new(&values).expect_err("should fail");
485    }
486
487    #[test]
488    fn test_parser_error_ok() {
489        let tests = vec![
490            sys::UParseError {
491                line: 0,
492                offset: 0,
493                preContext: [0; 16usize],
494                postContext: [0; 16usize],
495            },
496            sys::UParseError {
497                line: -1,
498                offset: 0,
499                preContext: [0; 16usize],
500                postContext: [0; 16usize],
501            },
502            sys::UParseError {
503                line: 0,
504                offset: -1,
505                preContext: [0; 16usize],
506                postContext: [0; 16usize],
507            },
508        ];
509        for test in tests {
510            assert!(parse_ok(test).is_ok(), "for test: {:?}", test.clone());
511        }
512    }
513
514    #[test]
515    fn test_parser_error_not_ok() {
516        let tests = vec![
517            sys::UParseError {
518                line: 1,
519                offset: 0,
520                preContext: [0; 16usize],
521                postContext: [0; 16usize],
522            },
523            sys::UParseError {
524                line: 0,
525                offset: 1,
526                preContext: [0; 16usize],
527                postContext: [0; 16usize],
528            },
529            sys::UParseError {
530                line: -1,
531                offset: 1,
532                preContext: [0; 16usize],
533                postContext: [0; 16usize],
534            },
535        ];
536        for test in tests {
537            assert!(parse_ok(test).is_err(), "for test: {:?}", test.clone());
538        }
539    }
540}
541
542/// A zero-value parse error, used to initialize types that get passed into FFI code.
543pub static NO_PARSE_ERROR: sys::UParseError = sys::UParseError {
544    line: 0,
545    offset: 0,
546    preContext: [0; 16usize],
547    postContext: [0; 16usize],
548};
549
550/// Converts a parse error to a Result.
551///
552/// A parse error is an error if line or offset are positive, apparently.
553pub fn parse_ok(e: sys::UParseError) -> Result<(), crate::Error> {
554    if e.line > 0 || e.offset > 0 {
555        return Err(Error::Wrapper(anyhow!(
556            "parse error: line: {}, offset: {}",
557            e.line,
558            e.offset
559        )));
560    }
561    Ok(())
562}
563