rust_icu_unorm2/
lib.rs
1use {
18 rust_icu_common as common,
19 rust_icu_sys as sys,
20 rust_icu_sys::versioned_function,
21 rust_icu_ustring as ustring,
22 rust_icu_ustring::buffered_uchar_method_with_retry,
23};
24use std::convert::{TryFrom, TryInto};
25
26#[derive(Debug)]
27pub struct UNormalizer {
28 rep: std::ptr::NonNull<sys::UNormalizer2>,
29 owned: bool,
30}
31
32impl Drop for UNormalizer {
33 fn drop(&mut self) {
35 if !self.owned {
37 return
38 }
39 unsafe {
40 versioned_function!(unorm2_close)(self.rep.as_ptr())
41 }
42 }
43}
44
45impl UNormalizer {
46 pub fn new_nfc() -> Result<Self, common::Error> {
48 unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFCInstance)) }
49 }
50
51 pub fn new_nfd() -> Result<Self, common::Error> {
53 unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFDInstance)) }
54 }
55
56 pub fn new_nfkc() -> Result<Self, common::Error> {
58 unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFKCInstance)) }
59 }
60
61 pub fn new_nfkd() -> Result<Self, common::Error> {
63 unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFKDInstance)) }
64 }
65
66 pub fn new_nfkc_casefold() -> Result<Self, common::Error> {
68 unsafe { UNormalizer::new_normalizer_unowned(versioned_function!(unorm2_getNFKCCasefoldInstance)) }
69 }
70
71 unsafe fn new_normalizer_unowned(
72 constrfn: unsafe extern "C" fn(*mut sys::UErrorCode) -> *const sys::UNormalizer2) -> Result<Self, common::Error> {
73 let mut status = common::Error::OK_CODE;
74 let rep = {
75 assert!(common::Error::is_ok(status));
76 let ptr = constrfn(&mut status) as *mut sys::UNormalizer2;
77 std::ptr::NonNull::new_unchecked(ptr)
78 };
79 common::Error::ok_or_warning(status)?;
80 Ok(UNormalizer{ rep, owned: false })
81 }
82
83 pub fn normalize(&self, norm: &str) -> Result<String, common::Error> {
85 let norm = ustring::UChar::try_from(norm)?;
86 let result = self.normalize_ustring(&norm)?;
87 String::try_from(&result)
88 }
89
90 pub fn normalize_ustring(
92 &self,
93 norm: &ustring::UChar
94 ) -> Result<ustring::UChar, common::Error> {
95 const CAPACITY: usize = 200;
96 buffered_uchar_method_with_retry!(
97 norm_uchar,
98 CAPACITY,
99 [ptr: *const sys::UNormalizer2, s: *const sys::UChar, l: i32,],
100 []
101 );
102 let result = norm_uchar(
103 versioned_function!(unorm2_normalize),
104 self.rep.as_ptr(),
105 norm.as_c_ptr(),
106 norm.len() as i32,
107 )?;
108 Ok(result)
109 }
110
111 pub fn compose_pair(&self, point1: sys::UChar32, point2: sys::UChar32) -> sys::UChar32 {
113 let result: sys::UChar32 = unsafe {
114 versioned_function!(unorm2_composePair)(
115 self.rep.as_ptr(), point1, point2)
116 };
117 result
118 }
119
120}
121
122#[cfg(test)]
123mod tests {
124 use super::*;
125 use rust_icu_ustring::UChar;
126
127 #[test]
128 fn test_compose_pair_nfkc() -> Result<(), common::Error> {
129 struct Test {
130 p1: sys::UChar32,
131 p2: sys::UChar32,
132 ex: sys::UChar32,
133 }
134 let tests = vec![
135 Test {p1: 1, p2: 0, ex: -1, },
136 Test {p2: 0x300, p1: 'A' as sys::UChar32, ex: 'À' as sys::UChar32 },
139 Test {p2: 0x301, p1: 'A' as sys::UChar32, ex: 'Á' as sys::UChar32 },
141 Test {p2: 0x302, p1: 'A' as sys::UChar32, ex: 'Â' as sys::UChar32 },
143 Test {p2: 0x303, p1: 'A' as sys::UChar32, ex: 'Ã' as sys::UChar32 },
145 ];
146
147 for t in tests {
148 let n = UNormalizer::new_nfkc()?;
149 let result = n.compose_pair(t.p1, t.p2);
150 assert_eq!(result, t.ex);
151 }
152 Ok(())
153 }
154
155 #[test]
157 fn test_long_input_string() -> Result<(), common::Error> {
158 let s = (0..67).map(|_| "탐").collect::<String>();
159 let u = UChar::try_from(&s[..]).unwrap();
160 let normalizer = UNormalizer::new_nfd().unwrap();
161 normalizer.normalize_ustring(&u).unwrap();
162
163 Ok(())
164 }
165}