rust_icu_udata/lib.rs
1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::path::Path;
16use std::ffi;
17
18use {
19 rust_icu_common as common, rust_icu_sys as sys, rust_icu_sys::versioned_function,
20 std::convert::TryFrom, std::os::raw,
21};
22
23/// Variants of [UDataMemory].
24#[derive(Debug)]
25enum Rep {
26 /// The data memory is backed by a user-supplied buffer.
27 Buffer(Vec<u8>),
28 /// The data memory is backed by a resource file.
29 Resource(
30 // This would have been std::ptr::NonNull if we didn't have to
31 // implement Send and Sync.
32 // We only ever touch this pointer in Rust when we initialize
33 // Rep::Resource, and when we dealocate Rep::Resource.
34 *const sys::UDataMemory,
35 ),
36}
37
38// Safety: The *const sys::UDataMemory above is only used by the underlying C++
39// library.
40unsafe impl Send for Rep {}
41unsafe impl Sync for Rep {}
42
43/// Sets the directory from which to load ICU data resources.
44///
45/// Implements `u_setDataDirectory`.
46pub fn set_data_directory(dir: &Path) {
47 let dir_cstr = ffi::CString::new
48 (dir.to_str().expect("this should never be a runtime error"))
49 .expect("this should never be a runtim error");
50 unsafe {
51 versioned_function!(u_setDataDirectory)(dir_cstr.as_ptr())
52 };
53}
54
55/// The type of the ICU resource requested. Some standard resources have their
56/// canned types. In case you run into one that is not captured here, use `Custom`,
57/// and consider sending a pull request to add the new resource type.
58pub enum Type {
59 /// An empty resource type. This is ostensibly allowed, but unclear when
60 /// it is applicable.
61 Empty,
62 /// The unpacked resource type, equivalent to "res" in ICU4C.
63 Res,
64 /// The cnv resource type, equivalent to "cnv" in ICU4C.
65 Cnv,
66 /// The "common" data type, equivalent to "dat" in ICU4C.
67 Dat,
68 /// A custom data type, in case none of the above fit your use case. It
69 /// is not clear whether this would ever be useful, but the ICU4C API
70 /// allows for it, so we must too.
71 Custom(String),
72}
73
74impl AsRef<str> for Type {
75 fn as_ref(&self) -> &str {
76 match self {
77 Type::Empty => &"",
78 Type::Res => &"res",
79 Type::Dat => &"dat",
80 Type::Cnv => &"cnv",
81 Type::Custom(ref s) => &s,
82 }
83 }
84}
85
86/// Implements `UDataMemory`.
87///
88/// Represents data memory backed by a borrowed memory buffer used for loading ICU data.
89/// [UDataMemory] is very much not thread safe, as it affects the global state of the ICU library.
90/// This suggests that the best way to use this data is to load it up in a main thread, or access
91/// it through a synchronized wrapper.
92#[derive(Debug)]
93pub struct UDataMemory {
94 // The internal representation of [UDataMemory].
95 // May vary, depending on the way the struct is created.
96 //
97 // See: [UDataMemory::try_from] and [UDataMemory::open].
98 rep: Rep,
99}
100
101impl Drop for UDataMemory {
102 // Implements `u_cleanup`.
103 fn drop(&mut self) {
104 if let Rep::Resource(r) = self.rep {
105 unsafe {
106 // Safety: there is no other way to close the memory that the
107 // underlying C++ library uses but to pass it into this function.
108 versioned_function!(udata_close)(r as *mut sys::UDataMemory)
109 };
110 }
111 // Without this, resource references will remain, but memory will be gone.
112 unsafe {
113 // Safety: no other way to call this function.
114 versioned_function!(u_cleanup)()
115 };
116 }
117}
118
119impl TryFrom<Vec<u8>> for crate::UDataMemory {
120 type Error = common::Error;
121 /// Makes a UDataMemory out of a buffer.
122 ///
123 /// Implements `udata_setCommonData`.
124 fn try_from(buf: Vec<u8>) -> Result<Self, Self::Error> {
125 let mut status = sys::UErrorCode::U_ZERO_ERROR;
126 // Expects that buf is a valid pointer and that it contains valid
127 // ICU data. If data is invalid, an error status will be set.
128 // No guarantees for invalid pointers.
129 unsafe {
130 versioned_function!(udata_setCommonData)(
131 buf.as_ptr() as *const raw::c_void,
132 &mut status,
133 );
134 };
135 common::Error::ok_or_warning(status)?;
136 Ok(UDataMemory { rep: Rep::Buffer(buf) })
137 }
138}
139
140impl crate::UDataMemory {
141
142 /// Uses the resources from the supplied resource file.
143 ///
144 /// This may end up being more efficient compared to loading from a buffer,
145 /// as ostensibly the resources would be memory mapped to only the needed
146 /// parts.
147 ///
148 /// - The `path` is the file path at which to find the resource file. Ostensibly
149 /// specifying `None` here will load from the "default" ICU_DATA path.
150 /// I have not been able to confirm this.
151 ///
152 /// - The `a_type` is the type of the resource file. It is not clear whether
153 /// the resource file type is a closed or open set, so we provide for both
154 /// possibilities.
155 ///
156 /// - The `name` is the name of the resource file. It is documented nullable
157 /// in the ICU documentation. Pass `None` here to pass nullptr to the
158 /// underlying C API.
159 ///
160 /// Presumably using `UDataMemory::open(Some("/dir/too"), Type::Res, Some("filename")` would
161 /// attempt to load ICU data from `/dir/too/filename.res`, as well as some other
162 /// canonical permutations of the above. The full documentation is
163 /// [here][1], although I could not confirm that the documentation is actually
164 /// describing what the code does. Also, using `None` at appropriate places
165 /// seems to be intended to load data from [some "default" sites][2]. I have
166 /// however observed that the actual behavior diverges from that documentation.
167 ///
168 /// Implements `udata_open`.
169 ///
170 /// [1]: https://unicode-org.github.io/icu/userguide/icu_data/#how-data-loading-works
171 /// [2]: https://unicode-org.github.io/icu/userguide/icu_data/#icu-data-directory
172 pub fn open(path: Option<&Path>, a_type: Type, name: Option<&str>) -> Result<Self, common::Error> {
173 let mut status = sys::UErrorCode::U_ZERO_ERROR;
174
175 let path_cstr = path.map(|s| { ffi::CString::new(s.to_str().expect("should never be a runtime error")).unwrap()});
176 let name_cstr = name.map(|s| { ffi::CString::new(s).expect("should never be a runtime error") } );
177 let type_cstr = ffi::CString::new(a_type.as_ref()).expect("should never be a runtime errror");
178
179 let rep = Self::get_resource(
180 path_cstr.as_ref().map(|s| s.as_c_str()),
181 type_cstr.as_c_str(),
182 name_cstr.as_ref().map(|s| s.as_c_str()),
183 &mut status);
184 common::Error::ok_or_warning(status)?;
185
186 // Make sure that all CStrs outlive the call to Self::get_resource. It is
187 // all too easy to omit `path_cstr.as_ref()` above, resulting in *_cstr
188 // being destroyed before a call to Self::get_resource happens. Fun.
189 let (_a, _b, _c) = (path_cstr, name_cstr, type_cstr);
190 Ok(crate::UDataMemory{ rep })
191 }
192
193 fn get_resource(path: Option<&ffi::CStr>, a_type: &ffi::CStr, name: Option<&ffi::CStr>, status: &mut sys::UErrorCode) -> Rep {
194 unsafe {
195 // Safety: we do what we must to call the underlying unsafe C API, and only return an
196 // opaque enum, to ensure that no rust client code may touch the raw pointer.
197 assert!(common::Error::is_ok(*status));
198
199 // Would be nicer if there were examples of udata_open usage to
200 // verify this.
201 let rep: *const sys::UDataMemory = versioned_function!(udata_open)(
202 path.map(|s| s.as_ptr()).unwrap_or(std::ptr::null()),
203 a_type.as_ptr(),
204 name.map(|c| c.as_ptr()).unwrap_or(std::ptr::null()),
205 status);
206 // Sadly we can not use NonNull, as we can not make the resulting
207 // type Sync or Send.
208 assert!(!rep.is_null());
209 Rep::Resource(rep)
210 }
211 }
212}
213
214#[cfg(test)]
215mod tests {
216
217 use super::*;
218 use std::sync::{Mutex, Weak, Arc};
219 use std::thread;
220
221 // We don't use UDataMemory in threaded contexts, but our users do. So let's
222 // ensure we can do this.
223 #[test]
224 fn send_sync_impl() {
225 let memory: Arc<Mutex<Weak<UDataMemory>>>= Arc::new(Mutex::new(Weak::new()));
226 // Ensure Sync.
227 let _clone = memory.clone();
228 thread::spawn(move || {
229 // Ensure Send.
230 let _m = memory;
231 });
232 }
233
234 #[test]
235 fn send_impl() {
236 let memory: Weak<UDataMemory> = Weak::new();
237 let _clone = memory.clone();
238 thread::spawn(move || {
239 let _m = memory;
240 });
241 }
242}