ttf_parser/tables/cmap/
format4.rs

1// https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-4-segment-mapping-to-delta-values
2
3use core::convert::TryFrom;
4
5use crate::parser::Stream;
6
7pub fn parse(data: &[u8], code_point: u32) -> Option<u16> {
8    // This subtable supports code points only in a u16 range.
9    let code_point = u16::try_from(code_point).ok()?;
10
11    let mut s = Stream::new(data);
12    s.advance(6); // format + length + language
13    let seg_count_x2: u16 = s.read()?;
14    if seg_count_x2 < 2 {
15        return None;
16    }
17
18    let seg_count = seg_count_x2 / 2;
19    s.advance(6); // searchRange + entrySelector + rangeShift
20
21    let end_codes = s.read_array16::<u16>(seg_count)?;
22    s.skip::<u16>(); // reservedPad
23    let start_codes = s.read_array16::<u16>(seg_count)?;
24    let id_deltas = s.read_array16::<i16>(seg_count)?;
25    let id_range_offset_pos = s.offset();
26    let id_range_offsets = s.read_array16::<u16>(seg_count)?;
27
28    // A custom binary search.
29    let mut start = 0;
30    let mut end = seg_count;
31    while end > start {
32        let index = (start + end) / 2;
33        let end_value = end_codes.get(index)?;
34        if end_value >= code_point {
35            let start_value = start_codes.get(index)?;
36            if start_value > code_point {
37                end = index;
38            } else {
39                let id_range_offset = id_range_offsets.get(index)?;
40                let id_delta = id_deltas.get(index)?;
41                if id_range_offset == 0 {
42                    return Some(code_point.wrapping_add(id_delta as u16));
43                }
44
45                let delta = (u32::from(code_point) - u32::from(start_value)) * 2;
46                let delta = u16::try_from(delta).ok()?;
47
48                let id_range_offset_pos = (id_range_offset_pos + usize::from(index) * 2) as u16;
49                let pos = id_range_offset_pos.wrapping_add(delta);
50                let pos = pos.wrapping_add(id_range_offset);
51                let glyph_array_value: u16 = Stream::read_at(data, usize::from(pos))?;
52
53                // 0 indicates missing glyph.
54                if glyph_array_value == 0 {
55                    return None;
56                }
57
58                let glyph_id = (glyph_array_value as i16).wrapping_add(id_delta);
59                return u16::try_from(glyph_id).ok();
60            }
61        } else {
62            start = index + 1;
63        }
64    }
65
66    None
67}
68
69pub fn codepoints(data: &[u8], mut f: impl FnMut(u32)) -> Option<()> {
70    let mut s = Stream::new(data);
71    s.advance(6); // format + length + language
72    let seg_count_x2: u16 = s.read()?;
73    if seg_count_x2 < 2 {
74        return None;
75    }
76
77    let seg_count = seg_count_x2 / 2;
78    s.advance(6); // searchRange + entrySelector + rangeShift
79
80    let end_codes = s.read_array16::<u16>(seg_count)?;
81    s.skip::<u16>(); // reservedPad
82    let start_codes = s.read_array16::<u16>(seg_count)?;
83
84    for (start, end) in start_codes.into_iter().zip(end_codes) {
85        for code_point in start..=end {
86            f(u32::from(code_point));
87        }
88    }
89
90    Some(())
91}
92
93#[cfg(test)]
94mod tests {
95    use super::{parse, codepoints};
96
97    #[test]
98    fn single_glyph() {
99        let data = &[
100            0x00, 0x04, // format: 4
101            0x00, 0x20, // subtable size: 32
102            0x00, 0x00, // language ID: 0
103            0x00, 0x04, // 2 x segCount: 4
104            0x00, 0x02, // search range: 2
105            0x00, 0x00, // entry selector: 0
106            0x00, 0x02, // range shift: 2
107            // End character codes
108            0x00, 0x41, // char code [0]: 65
109            0xFF, 0xFF, // char code [1]: 65535
110            0x00, 0x00, // reserved: 0
111            // Start character codes
112            0x00, 0x41, // char code [0]: 65
113            0xFF, 0xFF, // char code [1]: 65535
114            // Deltas
115            0xFF, 0xC0, // delta [0]: -64
116            0x00, 0x01, // delta [1]: 1
117            // Offsets into Glyph index array
118            0x00, 0x00, // offset [0]: 0
119            0x00, 0x00, // offset [1]: 0
120        ];
121
122        assert_eq!(parse(data, 0x41), Some(1));
123        assert_eq!(parse(data, 0x42), None);
124    }
125
126    #[test]
127    fn continuous_range() {
128        let data = &[
129            0x00, 0x04, // format: 4
130            0x00, 0x20, // subtable size: 32
131            0x00, 0x00, // language ID: 0
132            0x00, 0x04, // 2 x segCount: 4
133            0x00, 0x02, // search range: 2
134            0x00, 0x00, // entry selector: 0
135            0x00, 0x02, // range shift: 2
136            // End character codes
137            0x00, 0x49, // char code [0]: 73
138            0xFF, 0xFF, // char code [1]: 65535
139            0x00, 0x00, // reserved: 0
140            // Start character codes
141            0x00, 0x41, // char code [0]: 65
142            0xFF, 0xFF, // char code [1]: 65535
143            // Deltas
144            0xFF, 0xC0, // delta [0]: -64
145            0x00, 0x01, // delta [1]: 1
146            // Offsets into Glyph index array
147            0x00, 0x00, // offset [0]: 0
148            0x00, 0x00, // offset [1]: 0
149        ];
150
151        assert_eq!(parse(data, 0x40), None);
152        assert_eq!(parse(data, 0x41), Some(1));
153        assert_eq!(parse(data, 0x42), Some(2));
154        assert_eq!(parse(data, 0x43), Some(3));
155        assert_eq!(parse(data, 0x44), Some(4));
156        assert_eq!(parse(data, 0x45), Some(5));
157        assert_eq!(parse(data, 0x46), Some(6));
158        assert_eq!(parse(data, 0x47), Some(7));
159        assert_eq!(parse(data, 0x48), Some(8));
160        assert_eq!(parse(data, 0x49), Some(9));
161        assert_eq!(parse(data, 0x4A), None);
162    }
163
164    #[test]
165    fn multiple_ranges() {
166        let data = &[
167            0x00, 0x04, // format: 4
168            0x00, 0x30, // subtable size: 48
169            0x00, 0x00, // language ID: 0
170            0x00, 0x08, // 2 x segCount: 8
171            0x00, 0x04, // search range: 4
172            0x00, 0x01, // entry selector: 1
173            0x00, 0x04, // range shift: 4
174            // End character codes
175            0x00, 0x41, // char code [0]: 65
176            0x00, 0x45, // char code [1]: 69
177            0x00, 0x49, // char code [2]: 73
178            0xFF, 0xFF, // char code [3]: 65535
179            0x00, 0x00, // reserved: 0
180            // Start character codes
181            0x00, 0x41, // char code [0]: 65
182            0x00, 0x43, // char code [1]: 67
183            0x00, 0x47, // char code [2]: 71
184            0xFF, 0xFF, // char code [3]: 65535
185            // Deltas
186            0xFF, 0xC0, // delta [0]: -64
187            0xFF, 0xBF, // delta [1]: -65
188            0xFF, 0xBE, // delta [2]: -66
189            0x00, 0x01, // delta [3]: 1
190            // Offsets into Glyph index array
191            0x00, 0x00, // offset [0]: 0
192            0x00, 0x00, // offset [1]: 0
193            0x00, 0x00, // offset [2]: 0
194            0x00, 0x00, // offset [3]: 0
195        ];
196
197        assert_eq!(parse(data, 0x40), None);
198        assert_eq!(parse(data, 0x41), Some(1));
199        assert_eq!(parse(data, 0x42), None);
200        assert_eq!(parse(data, 0x43), Some(2));
201        assert_eq!(parse(data, 0x44), Some(3));
202        assert_eq!(parse(data, 0x45), Some(4));
203        assert_eq!(parse(data, 0x46), None);
204        assert_eq!(parse(data, 0x47), Some(5));
205        assert_eq!(parse(data, 0x48), Some(6));
206        assert_eq!(parse(data, 0x49), Some(7));
207        assert_eq!(parse(data, 0x4A), None);
208    }
209
210    #[test]
211    fn unordered_ids() {
212        let data = &[
213            0x00, 0x04, // format: 4
214            0x00, 0x2A, // subtable size: 42
215            0x00, 0x00, // language ID: 0
216            0x00, 0x04, // 2 x segCount: 4
217            0x00, 0x02, // search range: 2
218            0x00, 0x00, // entry selector: 0
219            0x00, 0x02, // range shift: 2
220            // End character codes
221            0x00, 0x45, // char code [0]: 69
222            0xFF, 0xFF, // char code [1]: 65535
223            0x00, 0x00, // reserved: 0
224            // Start character codes
225            0x00, 0x41, // char code [0]: 65
226            0xFF, 0xFF, // char code [1]: 65535
227            // Deltas
228            0x00, 0x00, // delta [0]: 0
229            0x00, 0x01, // delta [1]: 1
230            // Offsets into Glyph index array
231            0x00, 0x04, // offset [0]: 4
232            0x00, 0x00, // offset [1]: 0
233            // Glyph index array
234            0x00, 0x01, // glyph ID [0]: 1
235            0x00, 0x0A, // glyph ID [1]: 10
236            0x00, 0x64, // glyph ID [2]: 100
237            0x03, 0xE8, // glyph ID [3]: 1000
238            0x27, 0x10, // glyph ID [4]: 10000
239        ];
240
241        assert_eq!(parse(data, 0x40), None);
242        assert_eq!(parse(data, 0x41), Some(1));
243        assert_eq!(parse(data, 0x42), Some(10));
244        assert_eq!(parse(data, 0x43), Some(100));
245        assert_eq!(parse(data, 0x44), Some(1000));
246        assert_eq!(parse(data, 0x45), Some(10000));
247        assert_eq!(parse(data, 0x46), None);
248    }
249
250    #[test]
251    fn unordered_chars_and_ids() {
252        let data = &[
253            0x00, 0x04, // format: 4
254            0x00, 0x40, // subtable size: 64
255            0x00, 0x00, // language ID: 0
256            0x00, 0x0C, // 2 x segCount: 12
257            0x00, 0x08, // search range: 8
258            0x00, 0x02, // entry selector: 2
259            0x00, 0x04, // range shift: 4
260            // End character codes
261            0x00, 0x50, // char code [0]: 80
262            0x01, 0x00, // char code [1]: 256
263            0x01, 0x50, // char code [2]: 336
264            0x02, 0x00, // char code [3]: 512
265            0x02, 0x50, // char code [4]: 592
266            0xFF, 0xFF, // char code [5]: 65535
267            0x00, 0x00, // reserved: 0
268            // Start character codes
269            0x00, 0x50, // char code [0]: 80
270            0x01, 0x00, // char code [1]: 256
271            0x01, 0x50, // char code [2]: 336
272            0x02, 0x00, // char code [3]: 512
273            0x02, 0x50, // char code [4]: 592
274            0xFF, 0xFF, // char code [5]: 65535
275            // Deltas
276            0xFF, 0xB1, // delta [0]: -79
277            0xFF, 0x0A, // delta [1]: -246
278            0xFF, 0x14, // delta [2]: -236
279            0x01, 0xE8, // delta [3]: 488
280            0x24, 0xC0, // delta [4]: 9408
281            0x00, 0x01, // delta [5]: 1
282            // Offsets into Glyph index array
283            0x00, 0x00, // offset [0]: 0
284            0x00, 0x00, // offset [1]: 0
285            0x00, 0x00, // offset [2]: 0
286            0x00, 0x00, // offset [3]: 0
287            0x00, 0x00, // offset [4]: 0
288            0x00, 0x00, // offset [5]: 0
289        ];
290
291        assert_eq!(parse(data, 0x40),  None);
292        assert_eq!(parse(data, 0x50),  Some(1));
293        assert_eq!(parse(data, 0x100), Some(10));
294        assert_eq!(parse(data, 0x150), Some(100));
295        assert_eq!(parse(data, 0x200), Some(1000));
296        assert_eq!(parse(data, 0x250), Some(10000));
297        assert_eq!(parse(data, 0x300), None);
298    }
299
300    #[test]
301    fn no_end_codes() {
302        let data = &[
303            0x00, 0x04, // format: 4
304            0x00, 0x20, // subtable size: 28
305            0x00, 0x00, // language ID: 0
306            0x00, 0x04, // 2 x segCount: 4
307            0x00, 0x02, // search range: 2
308            0x00, 0x00, // entry selector: 0
309            0x00, 0x02, // range shift: 2
310            // End character codes
311            0x00, 0x49, // char code [0]: 73
312            // 0xFF, 0xFF, // char code [1]: 65535 <-- removed
313            0x00, 0x00, // reserved: 0
314            // Start character codes
315            0x00, 0x41, // char code [0]: 65
316            // 0xFF, 0xFF, // char code [1]: 65535 <-- removed
317            // Deltas
318            0xFF, 0xC0, // delta [0]: -64
319            0x00, 0x01, // delta [1]: 1
320            // Offsets into Glyph index array
321            0x00, 0x00, // offset [0]: 0
322            0x00, 0x00, // offset [1]: 0
323        ];
324
325        assert_eq!(parse(data, 0x40), None);
326        assert_eq!(parse(data, 0x41), None);
327        assert_eq!(parse(data, 0x42), None);
328        assert_eq!(parse(data, 0x43), None);
329        assert_eq!(parse(data, 0x44), None);
330        assert_eq!(parse(data, 0x45), None);
331        assert_eq!(parse(data, 0x46), None);
332        assert_eq!(parse(data, 0x47), None);
333        assert_eq!(parse(data, 0x48), None);
334        assert_eq!(parse(data, 0x49), None);
335        assert_eq!(parse(data, 0x4A), None);
336    }
337
338    #[test]
339    fn invalid_segment_count() {
340        let data = &[
341            0x00, 0x04, // format: 4
342            0x00, 0x20, // subtable size: 32
343            0x00, 0x00, // language ID: 0
344            0x00, 0x01, // 2 x segCount: 1 <-- must be more than 1
345            0x00, 0x02, // search range: 2
346            0x00, 0x00, // entry selector: 0
347            0x00, 0x02, // range shift: 2
348            // End character codes
349            0x00, 0x41, // char code [0]: 65
350            0xFF, 0xFF, // char code [1]: 65535
351            0x00, 0x00, // reserved: 0
352            // Start character codes
353            0x00, 0x41, // char code [0]: 65
354            0xFF, 0xFF, // char code [1]: 65535
355            // Deltas
356            0xFF, 0xC0, // delta [0]: -64
357            0x00, 0x01, // delta [1]: 1
358            // Offsets into Glyph index array
359            0x00, 0x00, // offset [0]: 0
360            0x00, 0x00, // offset [1]: 0
361        ];
362
363        assert_eq!(parse(data, 0x41), None);
364    }
365
366    #[test]
367    fn only_end_segments() {
368        let data = &[
369            0x00, 0x04, // format: 4
370            0x00, 0x20, // subtable size: 32
371            0x00, 0x00, // language ID: 0
372            0x00, 0x02, // 2 x segCount: 2
373            0x00, 0x02, // search range: 2
374            0x00, 0x00, // entry selector: 0
375            0x00, 0x02, // range shift: 2
376            // End character codes
377            0xFF, 0xFF, // char code [1]: 65535
378            0x00, 0x00, // reserved: 0
379            // Start character codes
380            0xFF, 0xFF, // char code [1]: 65535
381            // Deltas
382            0xFF, 0xC0, // delta [0]: -64
383            0x00, 0x01, // delta [1]: 1
384            // Offsets into Glyph index array
385            0x00, 0x00, // offset [0]: 0
386            0x00, 0x00, // offset [1]: 0
387        ];
388
389        // Should not loop forever.
390        assert_eq!(parse(data, 0x41), None);
391    }
392
393    #[test]
394    fn invalid_length() {
395        let data = &[
396            0x00, 0x04, // format: 4
397            0x00, 0x10, // subtable size: 16 <-- the size should be 32, but we don't check it anyway
398            0x00, 0x00, // language ID: 0
399            0x00, 0x04, // 2 x segCount: 4
400            0x00, 0x02, // search range: 2
401            0x00, 0x00, // entry selector: 0
402            0x00, 0x02, // range shift: 2
403            // End character codes
404            0x00, 0x41, // char code [0]: 65
405            0xFF, 0xFF, // char code [1]: 65535
406            0x00, 0x00, // reserved: 0
407            // Start character codes
408            0x00, 0x41, // char code [0]: 65
409            0xFF, 0xFF, // char code [1]: 65535
410            // Deltas
411            0xFF, 0xC0, // delta [0]: -64
412            0x00, 0x01, // delta [1]: 1
413            // Offsets into Glyph index array
414            0x00, 0x00, // offset [0]: 0
415            0x00, 0x00, // offset [1]: 0
416        ];
417
418        assert_eq!(parse(data, 0x41), Some(1));
419        assert_eq!(parse(data, 0x42), None);
420    }
421
422    #[test]
423    fn codepoint_out_of_range() {
424        let data = &[
425            0x00, 0x04, // format: 4
426            0x00, 0x20, // subtable size: 32
427            0x00, 0x00, // language ID: 0
428            0x00, 0x04, // 2 x segCount: 4
429            0x00, 0x02, // search range: 2
430            0x00, 0x00, // entry selector: 0
431            0x00, 0x02, // range shift: 2
432            // End character codes
433            0x00, 0x41, // char code [0]: 65
434            0xFF, 0xFF, // char code [1]: 65535
435            0x00, 0x00, // reserved: 0
436            // Start character codes
437            0x00, 0x41, // char code [0]: 65
438            0xFF, 0xFF, // char code [1]: 65535
439            // Deltas
440            0xFF, 0xC0, // delta [0]: -64
441            0x00, 0x01, // delta [1]: 1
442            // Offsets into Glyph index array
443            0x00, 0x00, // offset [0]: 0
444            0x00, 0x00, // offset [1]: 0
445        ];
446
447        // Format 4 support only u16 codepoints, so we have to bail immediately otherwise.
448        assert_eq!(parse(data, 0x1FFFF), None);
449    }
450
451    #[test]
452    fn zero() {
453        let data = &[
454            0x00, 0x04, // format: 4
455            0x00, 0x2A, // subtable size: 42
456            0x00, 0x00, // language ID: 0
457            0x00, 0x04, // 2 x segCount: 4
458            0x00, 0x02, // search range: 2
459            0x00, 0x00, // entry selector: 0
460            0x00, 0x02, // range shift: 2
461            // End character codes
462            0x00, 0x45, // char code [0]: 69
463            0xFF, 0xFF, // char code [1]: 65535
464            0x00, 0x00, // reserved: 0
465            // Start character codes
466            0x00, 0x41, // char code [0]: 65
467            0xFF, 0xFF, // char code [1]: 65535
468            // Deltas
469            0x00, 0x00, // delta [0]: 0
470            0x00, 0x01, // delta [1]: 1
471            // Offsets into Glyph index array
472            0x00, 0x04, // offset [0]: 4
473            0x00, 0x00, // offset [1]: 0
474            // Glyph index array
475            0x00, 0x00, // glyph ID [0]: 0 <-- indicates missing glyph
476            0x00, 0x0A, // glyph ID [1]: 10
477            0x00, 0x64, // glyph ID [2]: 100
478            0x03, 0xE8, // glyph ID [3]: 1000
479            0x27, 0x10, // glyph ID [4]: 10000
480        ];
481
482        assert_eq!(parse(data, 0x41), None);
483    }
484
485    #[test]
486    fn collect_codepoints() {
487        let data = &[
488            0x00, 0x04, // format: 4
489            0x00, 0x18, // subtable size: 24
490            0x00, 0x00, // language ID: 0
491            0x00, 0x04, // 2 x segCount: 4
492            0x00, 0x02, // search range: 2
493            0x00, 0x00, // entry selector: 0
494            0x00, 0x02, // range shift: 2
495            // End character codes
496            0x00, 0x22, // char code [0]: 34
497            0xFF, 0xFF, // char code [1]: 65535
498            0x00, 0x00, // reserved: 0
499            // Start character codes
500            0x00, 0x1B, // char code [0]: 27
501            0xFF, 0xFD, // char code [1]: 65533
502            // codepoints does not care about glyph ids
503        ];
504
505        let mut vec = vec![];
506        codepoints(data, |c| vec.push(c));
507        assert_eq!(vec, [27, 28, 29, 30, 31, 32, 33, 34, 65533, 65534, 65535]);
508    }
509}