1// Copyright 2019 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
45use anyhow::{format_err, Error};
6use serde::{Deserialize, Serialize};
7use std::ops::RangeInclusive;
89mod conversions;
1011/// A compact representation of a set of unsigned integer ranges.
12///
13/// The primary use case is succinctly encoding a large set of Unicode code points in JSON.
14///
15/// If the set of ranges is
16///
17/// [1..=3, 8..=9, 13..=13, 18..=20]
18///
19/// the `OffsetString` will be
20///
21/// "1+2,5+1,4,4+3"
22///
23/// In each entry, the first number is the offset from the end of the previous range. If the current
24/// range has length > 1, then there's a '+x' that shows how much to add to get the upper bound of
25/// the range. Note that the range [13, 14) has length 1, so it doesn't get a plus suffix.
26#[derive(Debug, Serialize, Deserialize, Eq, PartialEq)]
27#[serde(try_from = "String")]
28pub struct OffsetString(String);
2930impl OffsetString {
31/// Tries to construct a new `OffsetString` from a string.
32 ///
33 /// This method performs basic validation on whether the string is syntactically valid and does
34 /// not contain any redundantly short ranges. Returns an `Error` if validation fails.
35pub fn new<T: AsRef<str>>(source: T) -> Result<OffsetString, Error> {
36let mut segment_index = 0;
37for segment in source.as_ref().split(',') {
38let mut endpoints = segment.split('+');
3940// Not enough plus signs
41let low = endpoints.next().ok_or_else(|| format_err!("Empty segment"))?;
42let low_int = low.parse::<u32>()?;
4344if segment_index > 0 && low_int <= 1 {
45return Err(format_err!("Adjacent ranges must be merged"));
46 }
4748if let Some(span) = endpoints.next() {
49let span_int = span.parse::<u32>()?;
50if span_int < 1 {
51return Err(format_err!("Range is too small: {}", &segment));
52 }
5354// Too many plus signs
55if endpoints.next().is_some() {
56return Err(format_err!("Invalid segment: {}", &segment));
57 }
58 }
5960 segment_index += 1;
61 }
62Ok(OffsetString(source.as_ref().to_string()))
63 }
6465/// Iterate over the numeric ranges in the collection.
66pub fn iter_ranges<'a>(&'a self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
67self.0
68.split(',')
69 .map(|segment| {
70 segment.split('+').map(|s| s.parse::<u32>().unwrap()).collect::<Vec<u32>>()
71 })
72 .scan(0u32, |offset, parsed_ints| {
73let low = *offset + parsed_ints[0];
74let high = if parsed_ints.len() == 1 { low } else { low + parsed_ints[1] };
75*offset = high;
76Some(low..=high)
77 })
78 }
7980/// Iterate over the individual unsigned integers in the collection.
81pub fn iter<'a>(&'a self) -> impl Iterator<Item = u32> + 'a {
82self.iter_ranges().flat_map(|range| range)
83 }
84}
8586#[cfg(test)]
87mod tests {
88use super::*;
8990#[test]
91fn test_offset_string_new() {
92assert!(OffsetString::new("0+4,5,11+8").is_ok());
93assert!(OffsetString::new("1+3,5,11+8").is_ok());
94 }
9596#[test]
97fn test_offset_string_new_bad_string() {
98assert!(OffsetString::new("3+,5,11+8").is_err());
99assert!(OffsetString::new("-5+4,5,11+8").is_err());
100assert!(OffsetString::new("3+1,a,11+8").is_err());
101assert!(OffsetString::new("3+1,5,11+8,").is_err());
102 }
103104#[test]
105fn test_offset_string_new_bad_offset() {
106assert!(OffsetString::new("0+4,0,5,11+8").is_err());
107assert!(OffsetString::new("0+4,1,5,11+8").is_err());
108assert!(OffsetString::new("0+4,1+3,5,11+8").is_err());
109 }
110111#[test]
112fn test_offset_string_iter_ranges() -> Result<(), Error> {
113let offset_string = OffsetString::new("0+4,5,11+8")?;
114assert_eq!(
115 offset_string.iter_ranges().collect::<Vec<RangeInclusive<u32>>>(),
116vec![0..=4, 9..=9, 20..=28]
117 );
118Ok(())
119 }
120121#[test]
122fn test_offset_string_iter() -> Result<(), Error> {
123let offset_string = OffsetString::new("0+4,5,11+8")?;
124assert_eq!(
125 offset_string.iter().collect::<Vec<u32>>(),
126vec![0, 1, 2, 3, 4, 9, 20, 21, 22, 23, 24, 25, 26, 27, 28]
127 );
128Ok(())
129 }
130}