offset_string/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

use anyhow::{format_err, Error};
use serde::{Deserialize, Serialize};
use std::ops::RangeInclusive;

mod conversions;

/// A compact representation of a set of unsigned integer ranges.
///
/// The primary use case is succinctly encoding a large set of Unicode code points in JSON.
///
/// If the set of ranges is
///
///     [1..=3, 8..=9, 13..=13, 18..=20]
///
/// the `OffsetString` will be
///
///     "1+2,5+1,4,4+3"
///
/// In each entry, the first number is the offset from the end of the previous range. If the current
/// range has length > 1, then there's a '+x' that shows how much to add to get the upper bound of
/// the range. Note that the range [13, 14) has length 1, so it doesn't get a plus suffix.
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq)]
#[serde(try_from = "String")]
pub struct OffsetString(String);

impl OffsetString {
    /// Tries to construct a new `OffsetString` from a string.
    ///
    /// This method performs basic validation on whether the string is syntactically valid and does
    /// not contain any redundantly short ranges. Returns an `Error` if validation fails.
    pub fn new<T: AsRef<str>>(source: T) -> Result<OffsetString, Error> {
        let mut segment_index = 0;
        for segment in source.as_ref().split(',') {
            let mut endpoints = segment.split('+');

            // Not enough plus signs
            let low = endpoints.next().ok_or_else(|| format_err!("Empty segment"))?;
            let low_int = low.parse::<u32>()?;

            if segment_index > 0 && low_int <= 1 {
                return Err(format_err!("Adjacent ranges must be merged"));
            }

            if let Some(span) = endpoints.next() {
                let span_int = span.parse::<u32>()?;
                if span_int < 1 {
                    return Err(format_err!("Range is too small: {}", &segment));
                }

                // Too many plus signs
                if endpoints.next().is_some() {
                    return Err(format_err!("Invalid segment: {}", &segment));
                }
            }

            segment_index += 1;
        }
        Ok(OffsetString(source.as_ref().to_string()))
    }

    /// Iterate over the numeric ranges in the collection.
    pub fn iter_ranges<'a>(&'a self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
        self.0
            .split(',')
            .map(|segment| {
                segment.split('+').map(|s| s.parse::<u32>().unwrap()).collect::<Vec<u32>>()
            })
            .scan(0u32, |offset, parsed_ints| {
                let low = *offset + parsed_ints[0];
                let high = if parsed_ints.len() == 1 { low } else { low + parsed_ints[1] };
                *offset = high;
                Some(low..=high)
            })
    }

    /// Iterate over the individual unsigned integers in the collection.
    pub fn iter<'a>(&'a self) -> impl Iterator<Item = u32> + 'a {
        self.iter_ranges().flat_map(|range| range)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_offset_string_new() {
        assert!(OffsetString::new("0+4,5,11+8").is_ok());
        assert!(OffsetString::new("1+3,5,11+8").is_ok());
    }

    #[test]
    fn test_offset_string_new_bad_string() {
        assert!(OffsetString::new("3+,5,11+8").is_err());
        assert!(OffsetString::new("-5+4,5,11+8").is_err());
        assert!(OffsetString::new("3+1,a,11+8").is_err());
        assert!(OffsetString::new("3+1,5,11+8,").is_err());
    }

    #[test]
    fn test_offset_string_new_bad_offset() {
        assert!(OffsetString::new("0+4,0,5,11+8").is_err());
        assert!(OffsetString::new("0+4,1,5,11+8").is_err());
        assert!(OffsetString::new("0+4,1+3,5,11+8").is_err());
    }

    #[test]
    fn test_offset_string_iter_ranges() -> Result<(), Error> {
        let offset_string = OffsetString::new("0+4,5,11+8")?;
        assert_eq!(
            offset_string.iter_ranges().collect::<Vec<RangeInclusive<u32>>>(),
            vec![0..=4, 9..=9, 20..=28]
        );
        Ok(())
    }

    #[test]
    fn test_offset_string_iter() -> Result<(), Error> {
        let offset_string = OffsetString::new("0+4,5,11+8")?;
        assert_eq!(
            offset_string.iter().collect::<Vec<u32>>(),
            vec![0, 1, 2, 3, 4, 9, 20, 21, 22, 23, 24, 25, 26, 27, 28]
        );
        Ok(())
    }
}