regex_automata/state_id.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
use core::fmt::Debug;
use core::hash::Hash;
use core::mem::size_of;
use byteorder::{ByteOrder, NativeEndian};
#[cfg(feature = "std")]
pub use self::std::*;
#[cfg(feature = "std")]
mod std {
use byteorder::ByteOrder;
use core::mem::size_of;
use error::{Error, Result};
use super::StateID;
/// Check that the premultiplication of the given state identifier can
/// fit into the representation indicated by `S`. If it cannot, or if it
/// overflows `usize` itself, then an error is returned.
pub fn premultiply_overflow_error<S: StateID>(
last_state: S,
alphabet_len: usize,
) -> Result<()> {
let requested = match last_state.to_usize().checked_mul(alphabet_len) {
Some(requested) => requested,
None => return Err(Error::premultiply_overflow(0, 0)),
};
if requested > S::max_id() {
return Err(Error::premultiply_overflow(S::max_id(), requested));
}
Ok(())
}
/// Allocate the next sequential identifier for a fresh state given
/// the previously constructed state identified by `current`. If the
/// next sequential identifier would overflow `usize` or the chosen
/// representation indicated by `S`, then an error is returned.
pub fn next_state_id<S: StateID>(current: S) -> Result<S> {
let next = match current.to_usize().checked_add(1) {
Some(next) => next,
None => return Err(Error::state_id_overflow(::std::usize::MAX)),
};
if next > S::max_id() {
return Err(Error::state_id_overflow(S::max_id()));
}
Ok(S::from_usize(next))
}
/// Convert the given `usize` to the chosen state identifier
/// representation. If the given value cannot fit in the chosen
/// representation, then an error is returned.
pub fn usize_to_state_id<S: StateID>(value: usize) -> Result<S> {
if value > S::max_id() {
Err(Error::state_id_overflow(S::max_id()))
} else {
Ok(S::from_usize(value))
}
}
/// Write the given identifier to the given slice of bytes using the
/// specified endianness. The given slice must have length at least
/// `size_of::<S>()`.
///
/// The given state identifier representation must have size 1, 2, 4 or 8.
pub fn write_state_id_bytes<E: ByteOrder, S: StateID>(
slice: &mut [u8],
id: S,
) {
assert!(
1 == size_of::<S>()
|| 2 == size_of::<S>()
|| 4 == size_of::<S>()
|| 8 == size_of::<S>()
);
match size_of::<S>() {
1 => slice[0] = id.to_usize() as u8,
2 => E::write_u16(slice, id.to_usize() as u16),
4 => E::write_u32(slice, id.to_usize() as u32),
8 => E::write_u64(slice, id.to_usize() as u64),
_ => unreachable!(),
}
}
}
/// Return the unique identifier for a DFA's dead state in the chosen
/// representation indicated by `S`.
pub fn dead_id<S: StateID>() -> S {
S::from_usize(0)
}
/// A trait describing the representation of a DFA's state identifier.
///
/// The purpose of this trait is to safely express both the possible state
/// identifier representations that can be used in a DFA and to convert between
/// state identifier representations and types that can be used to efficiently
/// index memory (such as `usize`).
///
/// In general, one should not need to implement this trait explicitly. In
/// particular, this crate provides implementations for `u8`, `u16`, `u32`,
/// `u64` and `usize`. (`u32` and `u64` are only provided for targets that can
/// represent all corresponding values in a `usize`.)
///
/// # Safety
///
/// This trait is unsafe because the correctness of its implementations may be
/// relied upon by other unsafe code. For example, one possible way to
/// implement this trait incorrectly would be to return a maximum identifier
/// in `max_id` that is greater than the real maximum identifier. This will
/// likely result in wrap-on-overflow semantics in release mode, which can in
/// turn produce incorrect state identifiers. Those state identifiers may then
/// in turn access out-of-bounds memory in a DFA's search routine, where bounds
/// checks are explicitly elided for performance reasons.
pub unsafe trait StateID:
Clone + Copy + Debug + Eq + Hash + PartialEq + PartialOrd + Ord
{
/// Convert from a `usize` to this implementation's representation.
///
/// Implementors may assume that `n <= Self::max_id`. That is, implementors
/// do not need to check whether `n` can fit inside this implementation's
/// representation.
fn from_usize(n: usize) -> Self;
/// Convert this implementation's representation to a `usize`.
///
/// Implementors must not return a `usize` value greater than
/// `Self::max_id` and must not permit overflow when converting between the
/// implementor's representation and `usize`. In general, the preferred
/// way for implementors to achieve this is to simply not provide
/// implementations of `StateID` that cannot fit into the target platform's
/// `usize`.
fn to_usize(self) -> usize;
/// Return the maximum state identifier supported by this representation.
///
/// Implementors must return a correct bound. Doing otherwise may result
/// in memory unsafety.
fn max_id() -> usize;
/// Read a single state identifier from the given slice of bytes in native
/// endian format.
///
/// Implementors may assume that the given slice has length at least
/// `size_of::<Self>()`.
fn read_bytes(slice: &[u8]) -> Self;
/// Write this state identifier to the given slice of bytes in native
/// endian format.
///
/// Implementors may assume that the given slice has length at least
/// `size_of::<Self>()`.
fn write_bytes(self, slice: &mut [u8]);
}
unsafe impl StateID for usize {
#[inline]
fn from_usize(n: usize) -> usize {
n
}
#[inline]
fn to_usize(self) -> usize {
self
}
#[inline]
fn max_id() -> usize {
::core::usize::MAX
}
#[inline]
fn read_bytes(slice: &[u8]) -> Self {
NativeEndian::read_uint(slice, size_of::<usize>()) as usize
}
#[inline]
fn write_bytes(self, slice: &mut [u8]) {
NativeEndian::write_uint(slice, self as u64, size_of::<usize>())
}
}
unsafe impl StateID for u8 {
#[inline]
fn from_usize(n: usize) -> u8 {
n as u8
}
#[inline]
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize {
::core::u8::MAX as usize
}
#[inline]
fn read_bytes(slice: &[u8]) -> Self {
slice[0]
}
#[inline]
fn write_bytes(self, slice: &mut [u8]) {
slice[0] = self;
}
}
unsafe impl StateID for u16 {
#[inline]
fn from_usize(n: usize) -> u16 {
n as u16
}
#[inline]
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize {
::core::u16::MAX as usize
}
#[inline]
fn read_bytes(slice: &[u8]) -> Self {
NativeEndian::read_u16(slice)
}
#[inline]
fn write_bytes(self, slice: &mut [u8]) {
NativeEndian::write_u16(slice, self)
}
}
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
unsafe impl StateID for u32 {
#[inline]
fn from_usize(n: usize) -> u32 {
n as u32
}
#[inline]
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize {
::core::u32::MAX as usize
}
#[inline]
fn read_bytes(slice: &[u8]) -> Self {
NativeEndian::read_u32(slice)
}
#[inline]
fn write_bytes(self, slice: &mut [u8]) {
NativeEndian::write_u32(slice, self)
}
}
#[cfg(target_pointer_width = "64")]
unsafe impl StateID for u64 {
#[inline]
fn from_usize(n: usize) -> u64 {
n as u64
}
#[inline]
fn to_usize(self) -> usize {
self as usize
}
#[inline]
fn max_id() -> usize {
::core::u64::MAX as usize
}
#[inline]
fn read_bytes(slice: &[u8]) -> Self {
NativeEndian::read_u64(slice)
}
#[inline]
fn write_bytes(self, slice: &mut [u8]) {
NativeEndian::write_u64(slice, self)
}
}