rust_icu_common/lib.rs
1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! # Commonly used functionality adapters.
16//!
17//! At the moment, this crate contains the declaration of various errors
18
19use {
20 anyhow::anyhow,
21 rust_icu_sys as sys,
22 std::{ffi, os, convert::TryInto},
23 thiserror::Error,
24};
25
26/// Represents a Unicode error, resulting from operations of low-level ICU libraries.
27///
28/// This is modeled after absl::Status in the Abseil library, which provides ways
29/// for users to avoid dealing with all the numerous error codes directly.
30#[derive(Error, Debug)]
31pub enum Error {
32 /// The error originating in the underlying sys library.
33 ///
34 /// At the moment it is possible to produce an Error which has a zero error code (i.e. no
35 /// error), because it makes it unnecessary for users to deal with error codes directly. It
36 /// does make for a bit weird API, so we may turn it around a bit. Ideally, it should not be
37 /// possible to have an Error that isn't really an error.
38 #[error("ICU error code: {}", _0)]
39 Sys(sys::UErrorCode),
40
41 /// Errors originating from the wrapper code. For example when pre-converting input into
42 /// UTF8 for input that happens to be malformed.
43 #[error(transparent)]
44 Wrapper(#[from] anyhow::Error),
45}
46
47impl Error {
48 /// The error code denoting no error has happened.
49 pub const OK_CODE: sys::UErrorCode = sys::UErrorCode::U_ZERO_ERROR;
50
51 /// Returns true if this error code corresponds to no error.
52 pub fn is_ok(code: sys::UErrorCode) -> bool {
53 code == Self::OK_CODE
54 }
55
56 /// Creates a new error from the supplied status. Ok is returned if the error code does not
57 /// correspond to an error code (as opposed to OK or a warning code).
58 pub fn ok_or_warning(status: sys::UErrorCode) -> Result<(), Self> {
59 if Self::is_ok(status) || status < Self::OK_CODE {
60 Ok(())
61 } else {
62 Err(Error::Sys(status))
63 }
64 }
65
66 /// Creates a new error from the supplied status. Ok is returned if the
67 /// error code does not constitute an error in preflight mode.
68 ///
69 /// This error check explicitly ignores the buffer overflow error when reporting whether it
70 /// contains an error condition.
71 ///
72 /// Preflight calls to ICU libraries do a read-only scan of the input to determine the buffer
73 /// sizes required on the output in case of conversion calls such as `ucal_strFromUTF8`. The
74 /// way this call is made is to offer a zero-capacity buffer (which could be pointed to by a
75 /// `NULL` pointer), and then call the respective function. The function will compute the
76 /// buffer size, but will also return a bogus buffer overflow error.
77 pub fn ok_preflight(status: sys::UErrorCode) -> Result<(), Self> {
78 if status > Self::OK_CODE && status != sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR {
79 Err(Error::Sys(status))
80 } else {
81 Ok(())
82 }
83 }
84
85 /// Returns true if this error has the supplied `code`.
86 pub fn is_code(&self, code: sys::UErrorCode) -> bool {
87 if let Error::Sys(c) = self {
88 return *c == code;
89 }
90 false
91 }
92
93 /// Returns true if the error is an error, not a warning.
94 ///
95 /// The ICU4C library has error codes for errors and warnings.
96 pub fn is_err(&self) -> bool {
97 match self {
98 Error::Sys(code) => *code > sys::UErrorCode::U_ZERO_ERROR,
99 Error::Wrapper(_) => true,
100 }
101 }
102
103 /// Return true if there was an error in a preflight call.
104 ///
105 /// This error check explicitly ignores the buffer overflow error when reporting whether it
106 /// contains an error condition.
107 ///
108 /// Preflight calls to ICU libraries do a read-only scan of the input to determine the buffer
109 /// sizes required on the output in case of conversion calls such as `ucal_strFromUTF8`. The
110 /// way this call is made is to offer a zero-capacity buffer (which could be pointed to by a
111 /// `NULL` pointer), and then call the respective function. The function will compute the
112 /// buffer size, but will also return a bogus buffer overflow error.
113 pub fn is_preflight_err(&self) -> bool {
114 // We may expand the set of error codes that are exempt from error checks in preflight.
115 self.is_err() && !self.is_code(sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR)
116 }
117
118 /// Returns true if the error is, in fact, a warning (nonfatal).
119 pub fn is_warn(&self) -> bool {
120 match self {
121 Error::Sys(c) => *c < sys::UErrorCode::U_ZERO_ERROR,
122 _ => false,
123 }
124 }
125
126 pub fn wrapper(source: impl Into<anyhow::Error>) -> Self {
127 Self::Wrapper(source.into())
128 }
129}
130
131impl From<ffi::NulError> for Error {
132 fn from(e: ffi::NulError) -> Self {
133 Self::wrapper(e)
134 }
135}
136
137impl From<std::str::Utf8Error> for Error {
138 fn from(e: std::str::Utf8Error) -> Self {
139 Self::wrapper(e)
140 }
141}
142
143impl From<std::string::FromUtf8Error> for Error {
144 fn from(e: std::string::FromUtf8Error) -> Self {
145 Self::wrapper(e)
146 }
147}
148
149impl Into<std::fmt::Error> for Error {
150 fn into(self) -> std::fmt::Error {
151 // It is not possible to transfer any info into std::fmt::Error, so we log instead.
152 eprintln!("error while formatting: {:?}", &self);
153 std::fmt::Error {}
154 }
155}
156
157/// `type_name` is the type to implement drop for.
158/// `impl_function_name` is the name of the function that implements
159/// memory deallocation. It is assumed that the type has an internal
160/// representation wrapped in a [std::ptr::NonNull].
161///
162/// Example:
163///
164/// ```rust ignore
165/// pub struct UNumberFormatter {
166/// rep: std::ptr::NonNull<Foo>,
167/// }
168/// //...
169/// simple_drop_impl!(UNumberFormatter, unumf_close);
170/// ```
171#[macro_export]
172macro_rules! simple_drop_impl {
173 ($type_name:ty, $impl_function_name:ident) => {
174 impl $crate::__private_do_not_use::Drop for $type_name {
175 #[doc = concat!("Implements `", stringify!($impl_function_name), "`.")]
176 fn drop(&mut self) {
177 unsafe {
178 $crate::__private_do_not_use::versioned_function!($impl_function_name)
179 (self.rep.as_ptr());
180 }
181 }
182 }
183 };
184}
185
186/// Helper for calling ICU4C `uloc` methods that require a resizable output string buffer.
187pub fn buffered_string_method_with_retry<F>(
188 mut method_to_call: F,
189 buffer_capacity: usize,
190) -> Result<String, Error>
191where
192 F: FnMut(*mut os::raw::c_char, i32, *mut sys::UErrorCode) -> i32,
193{
194 let mut status = Error::OK_CODE;
195 let mut buf: Vec<u8> = vec![0; buffer_capacity];
196
197 // Requires that any pointers that are passed in are valid.
198 let full_len: i32 = {
199 assert!(Error::is_ok(status));
200 method_to_call(
201 buf.as_mut_ptr() as *mut os::raw::c_char,
202 buffer_capacity as i32,
203 &mut status,
204 )
205 };
206
207 // ICU methods are inconsistent in whether they silently truncate the output or treat
208 // the overflow as an error, so we need to check both cases.
209 if status == sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR ||
210 (Error::is_ok(status) &&
211 full_len > buffer_capacity
212 .try_into()
213 .map_err(|e| Error::wrapper(e))?) {
214
215 status = Error::OK_CODE;
216 assert!(full_len > 0);
217 let full_len: usize = full_len
218 .try_into()
219 .map_err(|e| Error::wrapper(e))?;
220 buf.resize(full_len, 0);
221
222 // Same unsafe requirements as above, plus full_len must be exactly the output
223 // buffer size.
224 {
225 assert!(Error::is_ok(status));
226 method_to_call(
227 buf.as_mut_ptr() as *mut os::raw::c_char,
228 full_len as i32,
229 &mut status,
230 )
231 };
232 }
233
234 Error::ok_or_warning(status)?;
235
236 // Adjust the size of the buffer here.
237 if full_len >= 0 {
238 let full_len: usize = full_len
239 .try_into()
240 .map_err(|e| Error::wrapper(e))?;
241 buf.resize(full_len, 0);
242 }
243 String::from_utf8(buf).map_err(|e| e.utf8_error().into())
244}
245
246/// There is a slew of near-identical method calls which differ in the type of
247/// the input argument and the name of the function to invoke.
248///
249/// The invocation:
250///
251/// ```rust ignore
252/// impl ... {
253/// // ...
254/// format_ustring_for_type!(format_f64, unum_formatDouble, f64);
255/// }
256/// ```
257///
258/// allows us to bind the function:
259///
260/// ```c++ ignore
261/// int32_t unum_formatDouble(
262/// const UNumberFormat* fmt,
263/// double number,
264/// UChar* result,
265/// int32_t result_length,
266/// UFieldPosition* pos,
267/// UErrorCode *status)
268/// ```
269///
270/// as:
271///
272/// ```rust ignore
273/// impl ... {
274/// format_f64(&self /* format */, value: f64) -> Result<ustring::UChar, common::Error>;
275/// }
276/// ```
277#[macro_export]
278macro_rules! format_ustring_for_type{
279 ($method_name:ident, $function_name:ident, $type_decl:ty) => (
280 #[doc = concat!("Implements `", stringify!($function_name), "`.")]
281 pub fn $method_name(&self, number: $type_decl) -> Result<String, common::Error> {
282 let result = paste::item! {
283 self. [< $method_name _ustring>] (number)?
284 };
285 String::try_from(&result)
286 }
287
288 // Should be able to use https://github.com/google/rust_icu/pull/144 to
289 // make this even shorter.
290 paste::item! {
291 #[doc = concat!("Implements `", stringify!($function_name), "`.")]
292 pub fn [<$method_name _ustring>] (&self, param: $type_decl) -> Result<ustring::UChar, common::Error> {
293 const CAPACITY: usize = 200;
294 buffered_uchar_method_with_retry!(
295 [< $method_name _ustring_impl >],
296 CAPACITY,
297 [ rep: *const sys::UNumberFormat, param: $type_decl, ],
298 [ field: *mut sys::UFieldPosition, ]
299 );
300
301 [<$method_name _ustring_impl>](
302 versioned_function!($function_name),
303 self.rep.as_ptr(),
304 param,
305 // The field position is unused for now.
306 0 as *mut sys::UFieldPosition,
307 )
308 }
309 }
310 )
311}
312
313/// Expands into a getter method that forwards all its arguments and returns a fallible value which
314/// is the same as the value returned by the underlying function.
315///
316/// The invocation:
317///
318/// ```rust ignore
319/// impl _ {
320/// generalized_fallible_getter!(
321/// get_context,
322/// unum_getContext,
323/// [context_type: sys::UDisplayContextType, ],
324/// sys::UDisplayContext
325/// );
326/// }
327/// ```
328///
329/// allows us to bind the function:
330///
331/// ```c++ ignore
332/// UDisplayContext unum_getContext(
333/// const SOMETYPE* t,
334/// UDisplayContextType type,
335/// UErrorCode* status
336/// );
337/// ```
338///
339/// which then becomes:
340///
341/// ```rust ignore
342/// impl _ {
343/// fn get_context(&self, context_type: sys::UDisplayContextType) -> Result<sys::UDisplayContext, common::Error>;
344/// }
345/// ```
346/// where `Self` has an internal representation named exactly `Self::rep`.
347#[macro_export]
348macro_rules! generalized_fallible_getter{
349 ($top_level_method_name:ident, $impl_name:ident, [ $( $arg:ident: $arg_type:ty ,)* ], $ret_type:ty) => (
350 #[doc = concat!("Implements `", stringify!($impl_name), "`.")]
351 pub fn $top_level_method_name(&self, $( $arg: $arg_type, )* ) -> Result<$ret_type, common::Error> {
352 let mut status = common::Error::OK_CODE;
353 let result: $ret_type = unsafe {
354 assert!(common::Error::is_ok(status));
355 versioned_function!($impl_name)(self.rep.as_ptr(), $( $arg, )* &mut status)
356 };
357 common::Error::ok_or_warning(status)?;
358 Ok(result)
359 }
360 )
361}
362
363/// Expands into a setter methods that forwards all its arguments between []'s and returns a
364/// Result<(), common::Error>.
365///
366/// The invocation:
367///
368/// ```rust ignore
369/// impl _ {
370/// generalized_fallible_setter!(
371/// get_context,
372/// unum_getContext,
373/// [context_type: sys::UDisplayContextType, ]
374/// );
375/// }
376/// ```
377///
378/// allows us to bind the function:
379///
380/// ```c++ ignore
381/// UDisplayContext unum_setContext(
382/// const SOMETYPE* t,
383/// UDisplayContext value,
384/// UErrorCode* status
385/// );
386/// ```
387///
388/// which then becomes:
389///
390/// ```rust ignore
391/// impl _ {
392/// fn set_context(&self, value: sys::UDisplayContext) -> Result<(), common::Error>;
393/// }
394/// ```
395/// where `Self` has an internal representation named exactly `Self::rep`.
396#[macro_export]
397macro_rules! generalized_fallible_setter{
398 ($top_level_method_name:ident, $impl_name:ident, [ $( $arg:ident : $arg_type:ty, )* ]) => (
399 generalized_fallible_getter!(
400 $top_level_method_name,
401 $impl_name,
402 [ $( $arg: $arg_type, )* ],
403 ());
404 )
405}
406
407/// Used to simulate an array of C-style strings.
408#[derive(Debug)]
409pub struct CStringVec {
410 // The internal representation of the vector of C strings.
411 rep: Vec<ffi::CString>,
412 // Same as rep, but converted into C pointers.
413 c_rep: Vec<*const os::raw::c_char>,
414}
415
416impl CStringVec {
417 /// Creates a new C string vector from the provided rust strings.
418 ///
419 /// C strings are continuous byte regions that end in `\0` and do not
420 /// contain `\0` anywhere else.
421 ///
422 /// Use `as_c_array` to get an unowned raw pointer to the array, to pass
423 /// into FFI C code.
424 pub fn new(strings: &[&str]) -> Result<Self, Error> {
425 let mut rep = Vec::with_capacity(strings.len());
426 // Convert all to asciiz strings and insert into the vector.
427 for elem in strings {
428 let asciiz = ffi::CString::new(*elem)?;
429 rep.push(asciiz);
430 }
431 let c_rep = rep.iter().map(|e| e.as_ptr()).collect();
432 Ok(CStringVec { rep, c_rep })
433 }
434
435 /// Returns the underlying array of C strings as a C array pointer. The
436 /// array must not change after construction to ensure that this pointer
437 /// remains valid.
438 pub fn as_c_array(&self) -> *const *const os::raw::c_char {
439 self.c_rep.as_ptr() as *const *const os::raw::c_char
440 }
441
442 /// Returns the number of elements in the vector.
443 pub fn len(&self) -> usize {
444 self.rep.len()
445 }
446
447 /// Returns whether the vector is empty.
448 pub fn is_empty(&self) -> bool {
449 self.rep.is_empty()
450 }
451}
452
453// Items used by macros. Unstable private API; do not use.
454#[doc(hidden)]
455pub mod __private_do_not_use {
456 pub use Drop;
457 pub use rust_icu_sys::versioned_function;
458}
459
460#[cfg(test)]
461mod tests {
462 use super::*;
463
464 #[test]
465 fn test_error_code() {
466 let error = Error::ok_or_warning(sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR)
467 .err()
468 .unwrap();
469 assert!(error.is_code(sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR));
470 assert!(!error.is_preflight_err());
471 assert!(!error.is_code(sys::UErrorCode::U_ZERO_ERROR));
472 }
473
474 #[test]
475 fn test_into_char_array() {
476 let values = vec!["eenie", "meenie", "minie", "moe"];
477 let c_array = CStringVec::new(&values).expect("success");
478 assert_eq!(c_array.len(), 4);
479 }
480
481 #[test]
482 fn test_with_embedded_nul_byte() {
483 let values = vec!["hell\0x00o"];
484 let _c_array = CStringVec::new(&values).expect_err("should fail");
485 }
486
487 #[test]
488 fn test_parser_error_ok() {
489 let tests = vec![
490 sys::UParseError {
491 line: 0,
492 offset: 0,
493 preContext: [0; 16usize],
494 postContext: [0; 16usize],
495 },
496 sys::UParseError {
497 line: -1,
498 offset: 0,
499 preContext: [0; 16usize],
500 postContext: [0; 16usize],
501 },
502 sys::UParseError {
503 line: 0,
504 offset: -1,
505 preContext: [0; 16usize],
506 postContext: [0; 16usize],
507 },
508 ];
509 for test in tests {
510 assert!(parse_ok(test).is_ok(), "for test: {:?}", test.clone());
511 }
512 }
513
514 #[test]
515 fn test_parser_error_not_ok() {
516 let tests = vec![
517 sys::UParseError {
518 line: 1,
519 offset: 0,
520 preContext: [0; 16usize],
521 postContext: [0; 16usize],
522 },
523 sys::UParseError {
524 line: 0,
525 offset: 1,
526 preContext: [0; 16usize],
527 postContext: [0; 16usize],
528 },
529 sys::UParseError {
530 line: -1,
531 offset: 1,
532 preContext: [0; 16usize],
533 postContext: [0; 16usize],
534 },
535 ];
536 for test in tests {
537 assert!(parse_ok(test).is_err(), "for test: {:?}", test.clone());
538 }
539 }
540}
541
542/// A zero-value parse error, used to initialize types that get passed into FFI code.
543pub static NO_PARSE_ERROR: sys::UParseError = sys::UParseError {
544 line: 0,
545 offset: 0,
546 preContext: [0; 16usize],
547 postContext: [0; 16usize],
548};
549
550/// Converts a parse error to a Result.
551///
552/// A parse error is an error if line or offset are positive, apparently.
553pub fn parse_ok(e: sys::UParseError) -> Result<(), crate::Error> {
554 if e.line > 0 || e.offset > 0 {
555 return Err(Error::Wrapper(anyhow!(
556 "parse error: line: {}, offset: {}",
557 e.line,
558 e.offset
559 )));
560 }
561 Ok(())
562}
563