ttf_parser/tables/cmap/format4.rs
1// https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-4-segment-mapping-to-delta-values
2
3use core::convert::TryFrom;
4
5use crate::parser::Stream;
6
7pub fn parse(data: &[u8], code_point: u32) -> Option<u16> {
8 // This subtable supports code points only in a u16 range.
9 let code_point = u16::try_from(code_point).ok()?;
10
11 let mut s = Stream::new(data);
12 s.advance(6); // format + length + language
13 let seg_count_x2: u16 = s.read()?;
14 if seg_count_x2 < 2 {
15 return None;
16 }
17
18 let seg_count = seg_count_x2 / 2;
19 s.advance(6); // searchRange + entrySelector + rangeShift
20
21 let end_codes = s.read_array16::<u16>(seg_count)?;
22 s.skip::<u16>(); // reservedPad
23 let start_codes = s.read_array16::<u16>(seg_count)?;
24 let id_deltas = s.read_array16::<i16>(seg_count)?;
25 let id_range_offset_pos = s.offset();
26 let id_range_offsets = s.read_array16::<u16>(seg_count)?;
27
28 // A custom binary search.
29 let mut start = 0;
30 let mut end = seg_count;
31 while end > start {
32 let index = (start + end) / 2;
33 let end_value = end_codes.get(index)?;
34 if end_value >= code_point {
35 let start_value = start_codes.get(index)?;
36 if start_value > code_point {
37 end = index;
38 } else {
39 let id_range_offset = id_range_offsets.get(index)?;
40 let id_delta = id_deltas.get(index)?;
41 if id_range_offset == 0 {
42 return Some(code_point.wrapping_add(id_delta as u16));
43 }
44
45 let delta = (u32::from(code_point) - u32::from(start_value)) * 2;
46 let delta = u16::try_from(delta).ok()?;
47
48 let id_range_offset_pos = (id_range_offset_pos + usize::from(index) * 2) as u16;
49 let pos = id_range_offset_pos.wrapping_add(delta);
50 let pos = pos.wrapping_add(id_range_offset);
51 let glyph_array_value: u16 = Stream::read_at(data, usize::from(pos))?;
52
53 // 0 indicates missing glyph.
54 if glyph_array_value == 0 {
55 return None;
56 }
57
58 let glyph_id = (glyph_array_value as i16).wrapping_add(id_delta);
59 return u16::try_from(glyph_id).ok();
60 }
61 } else {
62 start = index + 1;
63 }
64 }
65
66 None
67}
68
69pub fn codepoints(data: &[u8], mut f: impl FnMut(u32)) -> Option<()> {
70 let mut s = Stream::new(data);
71 s.advance(6); // format + length + language
72 let seg_count_x2: u16 = s.read()?;
73 if seg_count_x2 < 2 {
74 return None;
75 }
76
77 let seg_count = seg_count_x2 / 2;
78 s.advance(6); // searchRange + entrySelector + rangeShift
79
80 let end_codes = s.read_array16::<u16>(seg_count)?;
81 s.skip::<u16>(); // reservedPad
82 let start_codes = s.read_array16::<u16>(seg_count)?;
83
84 for (start, end) in start_codes.into_iter().zip(end_codes) {
85 for code_point in start..=end {
86 f(u32::from(code_point));
87 }
88 }
89
90 Some(())
91}
92
93#[cfg(test)]
94mod tests {
95 use super::{parse, codepoints};
96
97 #[test]
98 fn single_glyph() {
99 let data = &[
100 0x00, 0x04, // format: 4
101 0x00, 0x20, // subtable size: 32
102 0x00, 0x00, // language ID: 0
103 0x00, 0x04, // 2 x segCount: 4
104 0x00, 0x02, // search range: 2
105 0x00, 0x00, // entry selector: 0
106 0x00, 0x02, // range shift: 2
107 // End character codes
108 0x00, 0x41, // char code [0]: 65
109 0xFF, 0xFF, // char code [1]: 65535
110 0x00, 0x00, // reserved: 0
111 // Start character codes
112 0x00, 0x41, // char code [0]: 65
113 0xFF, 0xFF, // char code [1]: 65535
114 // Deltas
115 0xFF, 0xC0, // delta [0]: -64
116 0x00, 0x01, // delta [1]: 1
117 // Offsets into Glyph index array
118 0x00, 0x00, // offset [0]: 0
119 0x00, 0x00, // offset [1]: 0
120 ];
121
122 assert_eq!(parse(data, 0x41), Some(1));
123 assert_eq!(parse(data, 0x42), None);
124 }
125
126 #[test]
127 fn continuous_range() {
128 let data = &[
129 0x00, 0x04, // format: 4
130 0x00, 0x20, // subtable size: 32
131 0x00, 0x00, // language ID: 0
132 0x00, 0x04, // 2 x segCount: 4
133 0x00, 0x02, // search range: 2
134 0x00, 0x00, // entry selector: 0
135 0x00, 0x02, // range shift: 2
136 // End character codes
137 0x00, 0x49, // char code [0]: 73
138 0xFF, 0xFF, // char code [1]: 65535
139 0x00, 0x00, // reserved: 0
140 // Start character codes
141 0x00, 0x41, // char code [0]: 65
142 0xFF, 0xFF, // char code [1]: 65535
143 // Deltas
144 0xFF, 0xC0, // delta [0]: -64
145 0x00, 0x01, // delta [1]: 1
146 // Offsets into Glyph index array
147 0x00, 0x00, // offset [0]: 0
148 0x00, 0x00, // offset [1]: 0
149 ];
150
151 assert_eq!(parse(data, 0x40), None);
152 assert_eq!(parse(data, 0x41), Some(1));
153 assert_eq!(parse(data, 0x42), Some(2));
154 assert_eq!(parse(data, 0x43), Some(3));
155 assert_eq!(parse(data, 0x44), Some(4));
156 assert_eq!(parse(data, 0x45), Some(5));
157 assert_eq!(parse(data, 0x46), Some(6));
158 assert_eq!(parse(data, 0x47), Some(7));
159 assert_eq!(parse(data, 0x48), Some(8));
160 assert_eq!(parse(data, 0x49), Some(9));
161 assert_eq!(parse(data, 0x4A), None);
162 }
163
164 #[test]
165 fn multiple_ranges() {
166 let data = &[
167 0x00, 0x04, // format: 4
168 0x00, 0x30, // subtable size: 48
169 0x00, 0x00, // language ID: 0
170 0x00, 0x08, // 2 x segCount: 8
171 0x00, 0x04, // search range: 4
172 0x00, 0x01, // entry selector: 1
173 0x00, 0x04, // range shift: 4
174 // End character codes
175 0x00, 0x41, // char code [0]: 65
176 0x00, 0x45, // char code [1]: 69
177 0x00, 0x49, // char code [2]: 73
178 0xFF, 0xFF, // char code [3]: 65535
179 0x00, 0x00, // reserved: 0
180 // Start character codes
181 0x00, 0x41, // char code [0]: 65
182 0x00, 0x43, // char code [1]: 67
183 0x00, 0x47, // char code [2]: 71
184 0xFF, 0xFF, // char code [3]: 65535
185 // Deltas
186 0xFF, 0xC0, // delta [0]: -64
187 0xFF, 0xBF, // delta [1]: -65
188 0xFF, 0xBE, // delta [2]: -66
189 0x00, 0x01, // delta [3]: 1
190 // Offsets into Glyph index array
191 0x00, 0x00, // offset [0]: 0
192 0x00, 0x00, // offset [1]: 0
193 0x00, 0x00, // offset [2]: 0
194 0x00, 0x00, // offset [3]: 0
195 ];
196
197 assert_eq!(parse(data, 0x40), None);
198 assert_eq!(parse(data, 0x41), Some(1));
199 assert_eq!(parse(data, 0x42), None);
200 assert_eq!(parse(data, 0x43), Some(2));
201 assert_eq!(parse(data, 0x44), Some(3));
202 assert_eq!(parse(data, 0x45), Some(4));
203 assert_eq!(parse(data, 0x46), None);
204 assert_eq!(parse(data, 0x47), Some(5));
205 assert_eq!(parse(data, 0x48), Some(6));
206 assert_eq!(parse(data, 0x49), Some(7));
207 assert_eq!(parse(data, 0x4A), None);
208 }
209
210 #[test]
211 fn unordered_ids() {
212 let data = &[
213 0x00, 0x04, // format: 4
214 0x00, 0x2A, // subtable size: 42
215 0x00, 0x00, // language ID: 0
216 0x00, 0x04, // 2 x segCount: 4
217 0x00, 0x02, // search range: 2
218 0x00, 0x00, // entry selector: 0
219 0x00, 0x02, // range shift: 2
220 // End character codes
221 0x00, 0x45, // char code [0]: 69
222 0xFF, 0xFF, // char code [1]: 65535
223 0x00, 0x00, // reserved: 0
224 // Start character codes
225 0x00, 0x41, // char code [0]: 65
226 0xFF, 0xFF, // char code [1]: 65535
227 // Deltas
228 0x00, 0x00, // delta [0]: 0
229 0x00, 0x01, // delta [1]: 1
230 // Offsets into Glyph index array
231 0x00, 0x04, // offset [0]: 4
232 0x00, 0x00, // offset [1]: 0
233 // Glyph index array
234 0x00, 0x01, // glyph ID [0]: 1
235 0x00, 0x0A, // glyph ID [1]: 10
236 0x00, 0x64, // glyph ID [2]: 100
237 0x03, 0xE8, // glyph ID [3]: 1000
238 0x27, 0x10, // glyph ID [4]: 10000
239 ];
240
241 assert_eq!(parse(data, 0x40), None);
242 assert_eq!(parse(data, 0x41), Some(1));
243 assert_eq!(parse(data, 0x42), Some(10));
244 assert_eq!(parse(data, 0x43), Some(100));
245 assert_eq!(parse(data, 0x44), Some(1000));
246 assert_eq!(parse(data, 0x45), Some(10000));
247 assert_eq!(parse(data, 0x46), None);
248 }
249
250 #[test]
251 fn unordered_chars_and_ids() {
252 let data = &[
253 0x00, 0x04, // format: 4
254 0x00, 0x40, // subtable size: 64
255 0x00, 0x00, // language ID: 0
256 0x00, 0x0C, // 2 x segCount: 12
257 0x00, 0x08, // search range: 8
258 0x00, 0x02, // entry selector: 2
259 0x00, 0x04, // range shift: 4
260 // End character codes
261 0x00, 0x50, // char code [0]: 80
262 0x01, 0x00, // char code [1]: 256
263 0x01, 0x50, // char code [2]: 336
264 0x02, 0x00, // char code [3]: 512
265 0x02, 0x50, // char code [4]: 592
266 0xFF, 0xFF, // char code [5]: 65535
267 0x00, 0x00, // reserved: 0
268 // Start character codes
269 0x00, 0x50, // char code [0]: 80
270 0x01, 0x00, // char code [1]: 256
271 0x01, 0x50, // char code [2]: 336
272 0x02, 0x00, // char code [3]: 512
273 0x02, 0x50, // char code [4]: 592
274 0xFF, 0xFF, // char code [5]: 65535
275 // Deltas
276 0xFF, 0xB1, // delta [0]: -79
277 0xFF, 0x0A, // delta [1]: -246
278 0xFF, 0x14, // delta [2]: -236
279 0x01, 0xE8, // delta [3]: 488
280 0x24, 0xC0, // delta [4]: 9408
281 0x00, 0x01, // delta [5]: 1
282 // Offsets into Glyph index array
283 0x00, 0x00, // offset [0]: 0
284 0x00, 0x00, // offset [1]: 0
285 0x00, 0x00, // offset [2]: 0
286 0x00, 0x00, // offset [3]: 0
287 0x00, 0x00, // offset [4]: 0
288 0x00, 0x00, // offset [5]: 0
289 ];
290
291 assert_eq!(parse(data, 0x40), None);
292 assert_eq!(parse(data, 0x50), Some(1));
293 assert_eq!(parse(data, 0x100), Some(10));
294 assert_eq!(parse(data, 0x150), Some(100));
295 assert_eq!(parse(data, 0x200), Some(1000));
296 assert_eq!(parse(data, 0x250), Some(10000));
297 assert_eq!(parse(data, 0x300), None);
298 }
299
300 #[test]
301 fn no_end_codes() {
302 let data = &[
303 0x00, 0x04, // format: 4
304 0x00, 0x20, // subtable size: 28
305 0x00, 0x00, // language ID: 0
306 0x00, 0x04, // 2 x segCount: 4
307 0x00, 0x02, // search range: 2
308 0x00, 0x00, // entry selector: 0
309 0x00, 0x02, // range shift: 2
310 // End character codes
311 0x00, 0x49, // char code [0]: 73
312 // 0xFF, 0xFF, // char code [1]: 65535 <-- removed
313 0x00, 0x00, // reserved: 0
314 // Start character codes
315 0x00, 0x41, // char code [0]: 65
316 // 0xFF, 0xFF, // char code [1]: 65535 <-- removed
317 // Deltas
318 0xFF, 0xC0, // delta [0]: -64
319 0x00, 0x01, // delta [1]: 1
320 // Offsets into Glyph index array
321 0x00, 0x00, // offset [0]: 0
322 0x00, 0x00, // offset [1]: 0
323 ];
324
325 assert_eq!(parse(data, 0x40), None);
326 assert_eq!(parse(data, 0x41), None);
327 assert_eq!(parse(data, 0x42), None);
328 assert_eq!(parse(data, 0x43), None);
329 assert_eq!(parse(data, 0x44), None);
330 assert_eq!(parse(data, 0x45), None);
331 assert_eq!(parse(data, 0x46), None);
332 assert_eq!(parse(data, 0x47), None);
333 assert_eq!(parse(data, 0x48), None);
334 assert_eq!(parse(data, 0x49), None);
335 assert_eq!(parse(data, 0x4A), None);
336 }
337
338 #[test]
339 fn invalid_segment_count() {
340 let data = &[
341 0x00, 0x04, // format: 4
342 0x00, 0x20, // subtable size: 32
343 0x00, 0x00, // language ID: 0
344 0x00, 0x01, // 2 x segCount: 1 <-- must be more than 1
345 0x00, 0x02, // search range: 2
346 0x00, 0x00, // entry selector: 0
347 0x00, 0x02, // range shift: 2
348 // End character codes
349 0x00, 0x41, // char code [0]: 65
350 0xFF, 0xFF, // char code [1]: 65535
351 0x00, 0x00, // reserved: 0
352 // Start character codes
353 0x00, 0x41, // char code [0]: 65
354 0xFF, 0xFF, // char code [1]: 65535
355 // Deltas
356 0xFF, 0xC0, // delta [0]: -64
357 0x00, 0x01, // delta [1]: 1
358 // Offsets into Glyph index array
359 0x00, 0x00, // offset [0]: 0
360 0x00, 0x00, // offset [1]: 0
361 ];
362
363 assert_eq!(parse(data, 0x41), None);
364 }
365
366 #[test]
367 fn only_end_segments() {
368 let data = &[
369 0x00, 0x04, // format: 4
370 0x00, 0x20, // subtable size: 32
371 0x00, 0x00, // language ID: 0
372 0x00, 0x02, // 2 x segCount: 2
373 0x00, 0x02, // search range: 2
374 0x00, 0x00, // entry selector: 0
375 0x00, 0x02, // range shift: 2
376 // End character codes
377 0xFF, 0xFF, // char code [1]: 65535
378 0x00, 0x00, // reserved: 0
379 // Start character codes
380 0xFF, 0xFF, // char code [1]: 65535
381 // Deltas
382 0xFF, 0xC0, // delta [0]: -64
383 0x00, 0x01, // delta [1]: 1
384 // Offsets into Glyph index array
385 0x00, 0x00, // offset [0]: 0
386 0x00, 0x00, // offset [1]: 0
387 ];
388
389 // Should not loop forever.
390 assert_eq!(parse(data, 0x41), None);
391 }
392
393 #[test]
394 fn invalid_length() {
395 let data = &[
396 0x00, 0x04, // format: 4
397 0x00, 0x10, // subtable size: 16 <-- the size should be 32, but we don't check it anyway
398 0x00, 0x00, // language ID: 0
399 0x00, 0x04, // 2 x segCount: 4
400 0x00, 0x02, // search range: 2
401 0x00, 0x00, // entry selector: 0
402 0x00, 0x02, // range shift: 2
403 // End character codes
404 0x00, 0x41, // char code [0]: 65
405 0xFF, 0xFF, // char code [1]: 65535
406 0x00, 0x00, // reserved: 0
407 // Start character codes
408 0x00, 0x41, // char code [0]: 65
409 0xFF, 0xFF, // char code [1]: 65535
410 // Deltas
411 0xFF, 0xC0, // delta [0]: -64
412 0x00, 0x01, // delta [1]: 1
413 // Offsets into Glyph index array
414 0x00, 0x00, // offset [0]: 0
415 0x00, 0x00, // offset [1]: 0
416 ];
417
418 assert_eq!(parse(data, 0x41), Some(1));
419 assert_eq!(parse(data, 0x42), None);
420 }
421
422 #[test]
423 fn codepoint_out_of_range() {
424 let data = &[
425 0x00, 0x04, // format: 4
426 0x00, 0x20, // subtable size: 32
427 0x00, 0x00, // language ID: 0
428 0x00, 0x04, // 2 x segCount: 4
429 0x00, 0x02, // search range: 2
430 0x00, 0x00, // entry selector: 0
431 0x00, 0x02, // range shift: 2
432 // End character codes
433 0x00, 0x41, // char code [0]: 65
434 0xFF, 0xFF, // char code [1]: 65535
435 0x00, 0x00, // reserved: 0
436 // Start character codes
437 0x00, 0x41, // char code [0]: 65
438 0xFF, 0xFF, // char code [1]: 65535
439 // Deltas
440 0xFF, 0xC0, // delta [0]: -64
441 0x00, 0x01, // delta [1]: 1
442 // Offsets into Glyph index array
443 0x00, 0x00, // offset [0]: 0
444 0x00, 0x00, // offset [1]: 0
445 ];
446
447 // Format 4 support only u16 codepoints, so we have to bail immediately otherwise.
448 assert_eq!(parse(data, 0x1FFFF), None);
449 }
450
451 #[test]
452 fn zero() {
453 let data = &[
454 0x00, 0x04, // format: 4
455 0x00, 0x2A, // subtable size: 42
456 0x00, 0x00, // language ID: 0
457 0x00, 0x04, // 2 x segCount: 4
458 0x00, 0x02, // search range: 2
459 0x00, 0x00, // entry selector: 0
460 0x00, 0x02, // range shift: 2
461 // End character codes
462 0x00, 0x45, // char code [0]: 69
463 0xFF, 0xFF, // char code [1]: 65535
464 0x00, 0x00, // reserved: 0
465 // Start character codes
466 0x00, 0x41, // char code [0]: 65
467 0xFF, 0xFF, // char code [1]: 65535
468 // Deltas
469 0x00, 0x00, // delta [0]: 0
470 0x00, 0x01, // delta [1]: 1
471 // Offsets into Glyph index array
472 0x00, 0x04, // offset [0]: 4
473 0x00, 0x00, // offset [1]: 0
474 // Glyph index array
475 0x00, 0x00, // glyph ID [0]: 0 <-- indicates missing glyph
476 0x00, 0x0A, // glyph ID [1]: 10
477 0x00, 0x64, // glyph ID [2]: 100
478 0x03, 0xE8, // glyph ID [3]: 1000
479 0x27, 0x10, // glyph ID [4]: 10000
480 ];
481
482 assert_eq!(parse(data, 0x41), None);
483 }
484
485 #[test]
486 fn collect_codepoints() {
487 let data = &[
488 0x00, 0x04, // format: 4
489 0x00, 0x18, // subtable size: 24
490 0x00, 0x00, // language ID: 0
491 0x00, 0x04, // 2 x segCount: 4
492 0x00, 0x02, // search range: 2
493 0x00, 0x00, // entry selector: 0
494 0x00, 0x02, // range shift: 2
495 // End character codes
496 0x00, 0x22, // char code [0]: 34
497 0xFF, 0xFF, // char code [1]: 65535
498 0x00, 0x00, // reserved: 0
499 // Start character codes
500 0x00, 0x1B, // char code [0]: 27
501 0xFF, 0xFD, // char code [1]: 65533
502 // codepoints does not care about glyph ids
503 ];
504
505 let mut vec = vec![];
506 codepoints(data, |c| vec.push(c));
507 assert_eq!(vec, [27, 28, 29, 30, 31, 32, 33, 34, 65533, 65534, 65535]);
508 }
509}