offset_string/
lib.rs

1// Copyright 2019 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use anyhow::{format_err, Error};
6use serde::{Deserialize, Serialize};
7use std::ops::RangeInclusive;
8
9mod conversions;
10
11/// A compact representation of a set of unsigned integer ranges.
12///
13/// The primary use case is succinctly encoding a large set of Unicode code points in JSON.
14///
15/// If the set of ranges is
16///
17///     [1..=3, 8..=9, 13..=13, 18..=20]
18///
19/// the `OffsetString` will be
20///
21///     "1+2,5+1,4,4+3"
22///
23/// In each entry, the first number is the offset from the end of the previous range. If the current
24/// range has length > 1, then there's a '+x' that shows how much to add to get the upper bound of
25/// the range. Note that the range [13, 14) has length 1, so it doesn't get a plus suffix.
26#[derive(Debug, Serialize, Deserialize, Eq, PartialEq)]
27#[serde(try_from = "String")]
28pub struct OffsetString(String);
29
30impl OffsetString {
31    /// Tries to construct a new `OffsetString` from a string.
32    ///
33    /// This method performs basic validation on whether the string is syntactically valid and does
34    /// not contain any redundantly short ranges. Returns an `Error` if validation fails.
35    pub fn new<T: AsRef<str>>(source: T) -> Result<OffsetString, Error> {
36        let mut segment_index = 0;
37        for segment in source.as_ref().split(',') {
38            let mut endpoints = segment.split('+');
39
40            // Not enough plus signs
41            let low = endpoints.next().ok_or_else(|| format_err!("Empty segment"))?;
42            let low_int = low.parse::<u32>()?;
43
44            if segment_index > 0 && low_int <= 1 {
45                return Err(format_err!("Adjacent ranges must be merged"));
46            }
47
48            if let Some(span) = endpoints.next() {
49                let span_int = span.parse::<u32>()?;
50                if span_int < 1 {
51                    return Err(format_err!("Range is too small: {}", &segment));
52                }
53
54                // Too many plus signs
55                if endpoints.next().is_some() {
56                    return Err(format_err!("Invalid segment: {}", &segment));
57                }
58            }
59
60            segment_index += 1;
61        }
62        Ok(OffsetString(source.as_ref().to_string()))
63    }
64
65    /// Iterate over the numeric ranges in the collection.
66    pub fn iter_ranges<'a>(&'a self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
67        self.0
68            .split(',')
69            .map(|segment| {
70                segment.split('+').map(|s| s.parse::<u32>().unwrap()).collect::<Vec<u32>>()
71            })
72            .scan(0u32, |offset, parsed_ints| {
73                let low = *offset + parsed_ints[0];
74                let high = if parsed_ints.len() == 1 { low } else { low + parsed_ints[1] };
75                *offset = high;
76                Some(low..=high)
77            })
78    }
79
80    /// Iterate over the individual unsigned integers in the collection.
81    pub fn iter<'a>(&'a self) -> impl Iterator<Item = u32> + 'a {
82        self.iter_ranges().flat_map(|range| range)
83    }
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89
90    #[test]
91    fn test_offset_string_new() {
92        assert!(OffsetString::new("0+4,5,11+8").is_ok());
93        assert!(OffsetString::new("1+3,5,11+8").is_ok());
94    }
95
96    #[test]
97    fn test_offset_string_new_bad_string() {
98        assert!(OffsetString::new("3+,5,11+8").is_err());
99        assert!(OffsetString::new("-5+4,5,11+8").is_err());
100        assert!(OffsetString::new("3+1,a,11+8").is_err());
101        assert!(OffsetString::new("3+1,5,11+8,").is_err());
102    }
103
104    #[test]
105    fn test_offset_string_new_bad_offset() {
106        assert!(OffsetString::new("0+4,0,5,11+8").is_err());
107        assert!(OffsetString::new("0+4,1,5,11+8").is_err());
108        assert!(OffsetString::new("0+4,1+3,5,11+8").is_err());
109    }
110
111    #[test]
112    fn test_offset_string_iter_ranges() -> Result<(), Error> {
113        let offset_string = OffsetString::new("0+4,5,11+8")?;
114        assert_eq!(
115            offset_string.iter_ranges().collect::<Vec<RangeInclusive<u32>>>(),
116            vec![0..=4, 9..=9, 20..=28]
117        );
118        Ok(())
119    }
120
121    #[test]
122    fn test_offset_string_iter() -> Result<(), Error> {
123        let offset_string = OffsetString::new("0+4,5,11+8")?;
124        assert_eq!(
125            offset_string.iter().collect::<Vec<u32>>(),
126            vec![0, 1, 2, 3, 4, 9, 20, 21, 22, 23, 24, 25, 26, 27, 28]
127        );
128        Ok(())
129    }
130}