rust_icu_unorm2/
lib.rs

1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Contains implementations of functions from ICU's `unorm2.h`.
16
17use {
18    rust_icu_common as common,
19    rust_icu_sys as sys,
20    rust_icu_sys::versioned_function,
21    rust_icu_ustring as ustring,
22    rust_icu_ustring::buffered_uchar_method_with_retry,
23};
24use std::convert::{TryFrom, TryInto};
25
26#[derive(Debug)]
27pub struct UNormalizer {
28    rep: std::ptr::NonNull<sys::UNormalizer2>,
29    owned: bool,
30}
31
32impl Drop for UNormalizer {
33    /// Implements `unorm2_close`
34    fn drop(&mut self) {
35        // Close the normalizer only if we own it.
36        if !self.owned {
37            return
38        }
39        unsafe {
40            versioned_function!(unorm2_close)(self.rep.as_ptr())
41        }
42    }
43}
44
45impl UNormalizer {
46    /// Implements `unorm2_getNFCInstance`.
47    pub fn new_nfc() -> Result<Self, common::Error> {
48        unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFCInstance)) }
49    }
50
51    /// Implements `unorm2_getNFDInstance`.
52    pub fn new_nfd() -> Result<Self, common::Error> {
53        unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFDInstance)) }
54    }
55
56    /// Implements `unorm2_getNFKCInstance`.
57    pub fn new_nfkc() -> Result<Self, common::Error> {
58        unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFKCInstance)) }
59    }
60
61    /// Implements `unorm2_getNFKDInstance`.
62    pub fn new_nfkd() -> Result<Self, common::Error> {
63        unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFKDInstance)) }
64    }
65
66    /// Implements `unorm2_getNFKCCasefoldInstance`.
67    pub fn new_nfkc_casefold() -> Result<Self, common::Error> {
68        unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFKCCasefoldInstance)) }
69    }
70
71    unsafe fn new_normalizer_unowned(
72        constrfn: unsafe extern "C" fn(*mut sys::UErrorCode) -> *const sys::UNormalizer2) -> Result<Self, common::Error> {
73        let mut status = common::Error::OK_CODE;
74        let rep = {
75            assert!(common::Error::is_ok(status));
76            let ptr = constrfn(&mut status) as *mut sys::UNormalizer2;
77            std::ptr::NonNull::new_unchecked(ptr)
78        };
79        common::Error::ok_or_warning(status)?;
80        Ok(UNormalizer{ rep, owned: false })
81    }
82
83    /// Implements `unorm2_normalize`.
84    pub fn normalize(&self, norm: &str) -> Result<String, common::Error> {
85        let norm = ustring::UChar::try_from(norm)?;
86        let result = self.normalize_ustring(&norm)?;
87        String::try_from(&result)
88    }
89
90    /// Implements `unorm2_normalize`.
91    pub fn normalize_ustring(
92        &self,
93        norm: &ustring::UChar
94        ) -> Result<ustring::UChar, common::Error> {
95        const CAPACITY: usize = 200;
96        buffered_uchar_method_with_retry!(
97            norm_uchar,
98            CAPACITY,
99            [ptr: *const sys::UNormalizer2, s: *const sys::UChar, l: i32,],
100            []
101        );
102        let result = norm_uchar(
103            versioned_function!(unorm2_normalize),
104            self.rep.as_ptr(),
105            norm.as_c_ptr(),
106            norm.len() as i32,
107            )?;
108        Ok(result)
109    }
110
111    /// Implements `unorm2_composePair`.
112    pub fn compose_pair(&self, point1: sys::UChar32, point2: sys::UChar32) -> sys::UChar32 {
113        let result: sys::UChar32 = unsafe {
114            versioned_function!(unorm2_composePair)(
115                self.rep.as_ptr(), point1, point2)
116        };
117        result
118    }
119
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125    use rust_icu_ustring::UChar;
126
127    #[test]
128    fn test_compose_pair_nfkc() -> Result<(), common::Error> {
129        struct Test {
130            p1: sys::UChar32,
131            p2: sys::UChar32,
132            ex: sys::UChar32,
133        }
134        let tests = vec![
135            Test {p1: 1, p2: 0, ex: -1, },
136            // See the article: https://en.wikipedia.org/wiki/Combining_character
137            // LATIN CAPITAL LETTER A WITH GRAVE
138            Test {p2: 0x300, p1: 'A' as sys::UChar32, ex: 'À' as sys::UChar32 },
139            // LATIN CAPITAL LETTER A WITH ACUTE
140            Test {p2: 0x301, p1: 'A' as sys::UChar32, ex: 'Á' as sys::UChar32 },
141            // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
142            Test {p2: 0x302, p1: 'A' as sys::UChar32, ex: 'Â' as sys::UChar32 },
143            // LATIN CAPITAL LETTER A WITH TILDE
144            Test {p2: 0x303, p1: 'A' as sys::UChar32, ex: 'Ã' as sys::UChar32 },
145        ];
146
147        for t in tests {
148            let n = UNormalizer::new_nfkc()?;
149            let result = n.compose_pair(t.p1, t.p2);
150            assert_eq!(result, t.ex);
151        }
152        Ok(())
153    }
154
155    // https://github.com/google/rust_icu/issues/244
156    #[test]
157    fn test_long_input_string() -> Result<(), common::Error> {
158        let s = (0..67).map(|_| "탐").collect::<String>();
159        let u = UChar::try_from(&s[..]).unwrap();
160        let normalizer = UNormalizer::new_nfd().unwrap();
161        normalizer.normalize_ustring(&u).unwrap();
162
163        Ok(())
164    }
165}