ttf_parser/tables/cmap/
mod.rs

1//
2
3/*!
4A [character to glyph index mapping](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap)
5table implementation.
6
7This module provides a low-level alternative to
8[`Face::glyph_index`](../struct.Face.html#method.glyph_index) and
9[`Face::glyph_variation_index`](../struct.Face.html#method.glyph_variation_index)
10methods.
11*/
12
13use core::convert::TryFrom;
14
15use crate::{GlyphId, PlatformId};
16use crate::parser::{Stream, FromData, LazyArray16, NumFrom};
17
18mod format0;
19mod format2;
20mod format4;
21mod format6;
22mod format10;
23mod format12;
24mod format13;
25mod format14;
26
27pub use format14::GlyphVariationResult;
28
29
30/// An iterator over
31/// [character encoding](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap)
32/// subtables.
33#[derive(Clone, Copy, Default)]
34#[allow(missing_debug_implementations)]
35pub struct Subtables<'a> {
36    data: &'a [u8],
37    records: LazyArray16<'a, EncodingRecord>,
38    index: u16,
39}
40
41impl<'a> Iterator for Subtables<'a> {
42    type Item = Subtable<'a>;
43
44    #[inline]
45    fn next(&mut self) -> Option<Self::Item> {
46        if self.index < self.records.len() {
47            let index = u16::try_from(self.index).ok()?;
48            self.index += 1;
49
50            let record = self.records.get(index)?;
51            let subtable_data = self.data.get(usize::num_from(record.offset)..)?;
52            let format: Format = Stream::read_at(subtable_data, 0)?;
53            Some(Subtable {
54                platform_id: record.platform_id,
55                encoding_id: record.encoding_id,
56                format,
57                subtable_data,
58            })
59        } else {
60            None
61        }
62    }
63
64    #[inline]
65    fn count(self) -> usize {
66        usize::from(self.records.len())
67    }
68}
69
70
71/// A character encoding subtable.
72pub struct Subtable<'a> {
73    platform_id: PlatformId,
74    encoding_id: u16,
75    format: Format,
76    subtable_data: &'a [u8],
77}
78
79impl<'a> Subtable<'a> {
80    /// Returns encoding's platform.
81    #[inline]
82    pub fn platform_id(&self) -> PlatformId {
83        self.platform_id
84    }
85
86    /// Returns encoding ID.
87    #[inline]
88    pub fn encoding_id(&self) -> u16 {
89        self.encoding_id
90    }
91
92    /// Returns encoding's format.
93    #[inline]
94    pub fn format(&self) -> Format {
95        self.format
96    }
97
98    /// Checks that the current encoding is Unicode compatible.
99    #[inline]
100    pub fn is_unicode(&self) -> bool {
101        // https://docs.microsoft.com/en-us/typography/opentype/spec/name#windows-encoding-ids
102        const WINDOWS_UNICODE_BMP_ENCODING_ID: u16 = 1;
103        const WINDOWS_UNICODE_FULL_REPERTOIRE_ENCODING_ID: u16 = 10;
104
105        match self.platform_id {
106            PlatformId::Unicode => true,
107            PlatformId::Windows if self.encoding_id == WINDOWS_UNICODE_BMP_ENCODING_ID => true,
108            PlatformId::Windows => {
109                // "Fonts that support Unicode supplementary-plane characters (U+10000 to U+10FFFF)
110                // on the Windows platform must have a format 12 subtable for platform ID 3,
111                // encoding ID 10."
112                self.encoding_id == WINDOWS_UNICODE_FULL_REPERTOIRE_ENCODING_ID
113                && self.format == Format::SegmentedCoverage
114            }
115            _ => false,
116        }
117    }
118
119    /// Maps a character to a glyph ID.
120    ///
121    /// This is a low-level method and unlike `Face::glyph_index` it doesn't
122    /// check that the current encoding is Unicode.
123    /// It simply maps a `u32` codepoint number to a glyph ID.
124    ///
125    /// Returns `None`:
126    /// - when glyph ID is `0`.
127    /// - when format is `MixedCoverage`, since it's not supported.
128    /// - when format is `UnicodeVariationSequences`. Use `glyph_variation_index` instead.
129    #[inline]
130    pub fn glyph_index(&self, c: u32) -> Option<GlyphId> {
131        let glyph = match self.format {
132            Format::ByteEncodingTable => {
133                format0::parse(self.subtable_data, c)
134            }
135            Format::HighByteMappingThroughTable => {
136                format2::parse(self.subtable_data, c)
137            }
138            Format::SegmentMappingToDeltaValues => {
139                format4::parse(self.subtable_data, c)
140            }
141            Format::TrimmedTableMapping => {
142                format6::parse(self.subtable_data, c)
143            }
144            Format::MixedCoverage => {
145                // Unsupported.
146                None
147            }
148            Format::TrimmedArray => {
149                format10::parse(self.subtable_data, c)
150            }
151            Format::SegmentedCoverage => {
152                format12::parse(self.subtable_data, c)
153            }
154            Format::ManyToOneRangeMappings => {
155                format13::parse(self.subtable_data, c)
156            }
157            Format::UnicodeVariationSequences => {
158                // This subtable should be accessed via glyph_variation_index().
159                None
160            }
161        };
162
163        glyph.map(GlyphId)
164    }
165
166    /// Resolves a variation of a glyph ID from two code points.
167    ///
168    /// Returns `None`:
169    /// - when glyph ID is `0`.
170    /// - when format is not `UnicodeVariationSequences`.
171    #[inline]
172    pub fn glyph_variation_index(&self, c: char, variation: char) -> Option<GlyphVariationResult> {
173        if self.format == Format::UnicodeVariationSequences {
174            format14::parse(self.subtable_data, u32::from(c), u32::from(variation))
175        } else {
176            None
177        }
178    }
179
180    /// Calls `f` for all codepoints contained in this subtable.
181    ///
182    /// This is a low-level method and it doesn't check that the current
183    /// encoding is Unicode. It simply calls the function `f` for all `u32`
184    /// codepoints that are present in this subtable.
185    ///
186    /// Note that this may list codepoints for which `glyph_index` still returns
187    /// `None` because this method finds all codepoints which were _defined_ in
188    /// this subtable. The subtable may still map them to glyph ID `0`.
189    ///
190    /// Returns without doing anything:
191    /// - when format is `MixedCoverage`, since it's not supported.
192    /// - when format is `UnicodeVariationSequences`, since it's not supported.
193    pub fn codepoints<F: FnMut(u32)>(&self, f: F) {
194        let _ = match self.format {
195            Format::ByteEncodingTable => {
196                format0::codepoints(self.subtable_data, f)
197            }
198            Format::HighByteMappingThroughTable => {
199                format2::codepoints(self.subtable_data, f)
200            },
201            Format::SegmentMappingToDeltaValues => {
202                format4::codepoints(self.subtable_data, f)
203            },
204            Format::TrimmedTableMapping => {
205                format6::codepoints(self.subtable_data, f)
206            },
207            Format::MixedCoverage => {
208                // Unsupported
209                None
210            },
211            Format::TrimmedArray => {
212                format10::codepoints(self.subtable_data, f)
213            },
214            Format::SegmentedCoverage => {
215                format12::codepoints(self.subtable_data, f)
216            }
217            Format::ManyToOneRangeMappings => {
218                format13::codepoints(self.subtable_data, f)
219            },
220            Format::UnicodeVariationSequences => {
221                // Unsupported
222                None
223            },
224        };
225    }
226}
227
228impl<'a> core::fmt::Debug for Subtable<'a> {
229    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
230        f.debug_struct("Encoding")
231            .field("platform_id", &self.platform_id)
232            .field("encoding_id", &self.encoding_id)
233            .field("format", &self.format)
234            .finish()
235    }
236}
237
238
239#[derive(Clone, Copy)]
240struct EncodingRecord {
241    platform_id: PlatformId,
242    encoding_id: u16,
243    offset: u32,
244}
245
246impl FromData for EncodingRecord {
247    const SIZE: usize = 8;
248
249    #[inline]
250    fn parse(data: &[u8]) -> Option<Self> {
251        let mut s = Stream::new(data);
252        Some(EncodingRecord {
253            platform_id: s.read::<PlatformId>()?,
254            encoding_id: s.read::<u16>()?,
255            offset: s.read::<u32>()?,
256        })
257    }
258}
259
260
261/// A character map encoding format.
262#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Debug)]
263#[allow(missing_docs)]
264pub enum Format {
265    ByteEncodingTable = 0,
266    HighByteMappingThroughTable = 2,
267    SegmentMappingToDeltaValues = 4,
268    TrimmedTableMapping = 6,
269    MixedCoverage = 8,
270    TrimmedArray = 10,
271    SegmentedCoverage = 12,
272    ManyToOneRangeMappings = 13,
273    UnicodeVariationSequences = 14,
274}
275
276impl FromData for Format {
277    const SIZE: usize = 2;
278
279    #[inline]
280    fn parse(data: &[u8]) -> Option<Self> {
281        match u16::parse(data)? {
282             0 => Some(Format::ByteEncodingTable),
283             2 => Some(Format::HighByteMappingThroughTable),
284             4 => Some(Format::SegmentMappingToDeltaValues),
285             6 => Some(Format::TrimmedTableMapping),
286             8 => Some(Format::MixedCoverage),
287            10 => Some(Format::TrimmedArray),
288            12 => Some(Format::SegmentedCoverage),
289            13 => Some(Format::ManyToOneRangeMappings),
290            14 => Some(Format::UnicodeVariationSequences),
291            _ => None,
292        }
293    }
294}
295
296pub(crate) fn parse(data: &[u8]) -> Option<Subtables> {
297    let mut s = Stream::new(data);
298    s.skip::<u16>(); // version
299    let count: u16 = s.read()?;
300    let records = s.read_array16::<EncodingRecord>(count)?;
301
302    Some(Subtables {
303        data,
304        records,
305        index: 0,
306    })
307}