pub trait IterExt: Iterator + Sized {
// Required methods
fn to_bytes(self) -> Utf8CharSplitter<Self::Item, Self> ⓘ
where Self::Item: Borrow<Utf8Char>;
fn to_units(self) -> Utf16CharSplitter<Self::Item, Self> ⓘ
where Self::Item: Borrow<Utf16Char>;
fn to_utf8chars(self) -> Utf8CharMerger<Self::Item, Self> ⓘ
where Self::Item: Borrow<u8>;
fn to_utf16chars(self) -> Utf16CharMerger<Self::Item, Self> ⓘ
where Self::Item: Borrow<u16>;
}
Expand description
Iterator methods that convert between u8
s and Utf8Char
or u16
s and Utf16Char
All the iterator adapters also accept iterators that produce references of the type they convert from.
Required Methods§
Sourcefn to_bytes(self) -> Utf8CharSplitter<Self::Item, Self> ⓘ
fn to_bytes(self) -> Utf8CharSplitter<Self::Item, Self> ⓘ
Converts an iterator of Utf8Char
s or &Utf8Char
s to an iterator of
u8
s.
Has the same effect as .flat_map()
or .flatten()
, but the returned
iterator is ~40% faster.
The iterator also implements Read
(when the std
feature isn’t disabled).
Reading will never produce an error, and calls to .read()
and .next()
can be mixed.
The exact number of bytes cannot be known in advance, but size_hint()
gives the possible range.
(min: all remaining characters are ASCII, max: all require four bytes)
§Examples
From iterator of values:
use encode_unicode::{IterExt, StrExt};
let iterator = "foo".utf8chars();
let mut bytes = [0; 4];
for (u,dst) in iterator.to_bytes().zip(&mut bytes) {*dst=u;}
assert_eq!(&bytes, b"foo\0");
From iterator of references:
use encode_unicode::{IterExt, StrExt, Utf8Char};
let chars: Vec<Utf8Char> = "💣 bomb 💣".utf8chars().collect();
let bytes: Vec<u8> = chars.iter().to_bytes().collect();
let flat_map: Vec<u8> = chars.iter().flat_map(|u8c| *u8c ).collect();
assert_eq!(bytes, flat_map);
Read
ing from it:
use encode_unicode::{IterExt, StrExt};
use std::io::Read;
let s = "Ååh‽";
assert_eq!(s.len(), 8);
let mut buf = [b'E'; 9];
let mut reader = s.utf8chars().to_bytes();
assert_eq!(reader.read(&mut buf[..]).unwrap(), 8);
assert_eq!(reader.read(&mut buf[..]).unwrap(), 0);
assert_eq!(&buf[..8], s.as_bytes());
assert_eq!(buf[8], b'E');
Sourcefn to_units(self) -> Utf16CharSplitter<Self::Item, Self> ⓘ
fn to_units(self) -> Utf16CharSplitter<Self::Item, Self> ⓘ
Converts an iterator of Utf16Char
(or &Utf16Char
) to an iterator of
u16
s.
Has the same effect as .flat_map()
or .flatten()
, but the returned
iterator is about twice as fast.
The exact number of units cannot be known in advance, but size_hint()
gives the possible range.
§Examples
From iterator of values:
use encode_unicode::{IterExt, StrExt};
let iterator = "foo".utf16chars();
let mut units = [0; 4];
for (u,dst) in iterator.to_units().zip(&mut units) {*dst=u;}
assert_eq!(units, ['f' as u16, 'o' as u16, 'o' as u16, 0]);
From iterator of references:
use encode_unicode::{IterExt, StrExt, Utf16Char};
// (💣 takes two units)
let chars: Vec<Utf16Char> = "💣 bomb 💣".utf16chars().collect();
let units: Vec<u16> = chars.iter().to_units().collect();
let flat_map: Vec<u16> = chars.iter().flat_map(|u16c| *u16c ).collect();
assert_eq!(units, flat_map);
Sourcefn to_utf8chars(self) -> Utf8CharMerger<Self::Item, Self> ⓘ
fn to_utf8chars(self) -> Utf8CharMerger<Self::Item, Self> ⓘ
Decodes bytes as UTF-8 and groups them into Utf8Char
s
When errors (invalid values or sequences) are encountered,
it continues with the byte right after the start of the error sequence.
This is neither the most intelligent choiche (sometimes it is guaranteed to
produce another error), nor the easiest to implement, but I believe it to
be the most predictable.
It also means that ASCII characters are never hidden by errors.
§Examples
Replace all errors with u+FFFD REPLACEMENT_CHARACTER:
use encode_unicode::{Utf8Char, IterExt};
let mut buf = [b'\0'; 255];
let len = b"foo\xCFbar".iter()
.to_utf8chars()
.flat_map(|r| r.unwrap_or(Utf8Char::from('\u{FFFD}')).into_iter() )
.zip(&mut buf[..])
.map(|(byte, dst)| *dst = byte )
.count();
assert_eq!(&buf[..len], "foo\u{FFFD}bar".as_bytes());
Collect everything up until the first error into a string:
use encode_unicode::iterator::Utf8CharMerger;
let mut good = String::new();
for r in Utf8CharMerger::from(b"foo\xcc\xbbbar\xcc\xddbaz") {
if let Ok(uc) = r {
good.push_str(uc.as_str());
} else {
break;
}
}
assert_eq!(good, "foo̻bar");
Abort decoding on error:
use encode_unicode::{IterExt, Utf8Char};
use encode_unicode::error::{InvalidUtf8Slice, InvalidUtf8};
let result = b"ab\0\xe0\xbc\xa9 \xf3\x80\x77".iter()
.to_utf8chars()
.collect::<Result<String,InvalidUtf8Slice>>();
assert_eq!(result, Err(InvalidUtf8Slice::Utf8(InvalidUtf8::NotAContinuationByte(2))));
Sourcefn to_utf16chars(self) -> Utf16CharMerger<Self::Item, Self> ⓘ
fn to_utf16chars(self) -> Utf16CharMerger<Self::Item, Self> ⓘ
Decodes bytes as UTF-16 and groups them into Utf16Char
s
When errors (unmatched leading surrogates or unexpected trailing surrogates) are encountered, an error is produced for every unit.
§Examples
Replace errors with ‘�’:
use encode_unicode::{IterExt, Utf16Char};
let slice = &['a' as u16, 0xdf00, 0xd83c, 0xdca0][..];
let string = slice.iter()
.to_utf16chars()
.map(|r| r.unwrap_or(Utf16Char::from('\u{fffd}')) ) // REPLACEMENT_CHARACTER
.collect::<String>();
assert_eq!(string, "a�🂠");
use encode_unicode::{IterExt, Utf16Char};
use encode_unicode::error::Utf16PairError::*;
let slice = [0xdcba, 0xdeff, 0xd8be, 0xdeee, 'Y' as u16, 0xdab1, 0xdab1];
let mut iter = slice.iter().to_utf16chars();
assert_eq!(iter.size_hint(), (3, Some(7)));
assert_eq!(iter.next(), Some(Err(UnexpectedTrailingSurrogate)));
assert_eq!(iter.next(), Some(Err(UnexpectedTrailingSurrogate)));
assert_eq!(iter.next(), Some(Ok(Utf16Char::from('\u{3faee}'))));
assert_eq!(iter.next(), Some(Ok(Utf16Char::from('Y'))));
assert_eq!(iter.next(), Some(Err(UnmatchedLeadingSurrogate)));
assert_eq!(iter.next(), Some(Err(Incomplete)));
assert_eq!(iter.into_remaining_units().next(), None);
Search for a codepoint and return the codepoint index of the first match:
use encode_unicode::{IterExt, Utf16Char};
let position = [0xd875, 0xdd4f, '≈' as u16, '2' as u16].iter()
.to_utf16chars()
.position(|r| r == Ok(Utf16Char::from('≈')) );
assert_eq!(position, Some(1));
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.