pub trait CharExt: Sized {
Show 13 methods
// Required methods
fn to_utf8(self) -> Utf8Char;
fn to_utf16(self) -> Utf16Char;
fn iter_utf8_bytes(self) -> Utf8Iterator ⓘ;
fn iter_utf16_units(self) -> Utf16Iterator ⓘ;
fn to_utf8_array(self) -> ([u8; 4], usize);
fn to_utf16_tuple(self) -> (u16, Option<u16>);
fn from_utf8_slice_start(
src: &[u8],
) -> Result<(Self, usize), InvalidUtf8Slice>;
fn from_utf16_slice_start(
src: &[u16],
) -> Result<(Self, usize), InvalidUtf16Slice>;
fn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>;
fn from_utf16_tuple(
utf16: (u16, Option<u16>),
) -> Result<Self, InvalidUtf16Tuple>;
unsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self;
unsafe fn from_utf16_tuple_unchecked(utf16: (u16, Option<u16>)) -> Self;
fn from_u32_detailed(c: u32) -> Result<Self, InvalidCodepoint>;
}
Expand description
Extension trait for char
that adds methods for converting to and from UTF-8 or UTF-16.
Required Methods§
Sourcefn to_utf8(self) -> Utf8Char
fn to_utf8(self) -> Utf8Char
Get the UTF-8 representation of this codepoint.
Utf8Char
is to [u8;4]
what char
is to u32
:
a restricted type that cannot be mutated internally.
Sourcefn to_utf16(self) -> Utf16Char
fn to_utf16(self) -> Utf16Char
Get the UTF-16 representation of this codepoint.
Utf16Char
is to [u16;2]
what char
is to u32
:
a restricted type that cannot be mutated internally.
Sourcefn iter_utf8_bytes(self) -> Utf8Iterator ⓘ
fn iter_utf8_bytes(self) -> Utf8Iterator ⓘ
Iterate over or read the one to four bytes in the UTF-8 representation of this codepoint.
An identical alternative to the unstable char.encode_utf8()
.
That method somehow still exist on stable, so I have to use a different name.
Sourcefn iter_utf16_units(self) -> Utf16Iterator ⓘ
fn iter_utf16_units(self) -> Utf16Iterator ⓘ
Iterate over the one or two units in the UTF-16 representation of this codepoint.
An identical alternative to the unstable char.encode_utf16()
.
That method somehow still exist on stable, so I have to use a different name.
Sourcefn to_utf8_array(self) -> ([u8; 4], usize)
fn to_utf8_array(self) -> ([u8; 4], usize)
Convert this char to an UTF-8 array, and also return how many bytes of the array are used,
The returned array is left-aligned with unused bytes set to zero.
Sourcefn to_utf16_tuple(self) -> (u16, Option<u16>)
fn to_utf16_tuple(self) -> (u16, Option<u16>)
Convert this char
to UTF-16.
The second u16
is Some
if a surrogate pair is required.
Sourcefn from_utf8_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>
fn from_utf8_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>
Create a char
from the start of an UTF-8 slice,
and also return how many bytes were used.
§Errors
Returns an Err
if the slice is empty, doesn’t start with a valid
UTF-8 sequence or is too short for the sequence.
§Examples
use encode_unicode::CharExt;
use encode_unicode::error::InvalidUtf8Slice::*;
use encode_unicode::error::InvalidUtf8::*;
assert_eq!(char::from_utf8_slice_start(&[b'A', b'B', b'C']), Ok(('A',1)));
assert_eq!(char::from_utf8_slice_start(&[0xdd, 0xbb]), Ok(('\u{77b}',2)));
assert_eq!(char::from_utf8_slice_start(&[]), Err(TooShort(1)));
assert_eq!(char::from_utf8_slice_start(&[0xf0, 0x99]), Err(TooShort(4)));
assert_eq!(char::from_utf8_slice_start(&[0xee, b'F', 0x80]), Err(Utf8(NotAContinuationByte(1))));
assert_eq!(char::from_utf8_slice_start(&[0xee, 0x99, 0x0f]), Err(Utf8(NotAContinuationByte(2))));
Sourcefn from_utf16_slice_start(
src: &[u16],
) -> Result<(Self, usize), InvalidUtf16Slice>
fn from_utf16_slice_start( src: &[u16], ) -> Result<(Self, usize), InvalidUtf16Slice>
Create a char
from the start of an UTF-16 slice,
and also return how many units were used.
If you want to continue after an error, continue with the next u16
unit.
Sourcefn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>
fn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>
Convert an UTF-8 sequence as returned from .to_utf8_array()
into a char
The codepoint must start at the first byte, and leftover bytes are ignored.
§Errors
Returns an Err
if the array doesn’t start with a valid UTF-8 sequence.
§Examples
use encode_unicode::CharExt;
use encode_unicode::error::InvalidUtf8Array::*;
use encode_unicode::error::InvalidUtf8::*;
use encode_unicode::error::InvalidCodepoint::*;
assert_eq!(char::from_utf8_array([b'A', 0, 0, 0]), Ok('A'));
assert_eq!(char::from_utf8_array([0xf4, 0x8b, 0xbb, 0xbb]), Ok('\u{10befb}'));
assert_eq!(char::from_utf8_array([b'A', b'B', b'C', b'D']), Ok('A'));
assert_eq!(char::from_utf8_array([0, 0, 0xcc, 0xbb]), Ok('\0'));
assert_eq!(char::from_utf8_array([0xef, b'F', 0x80, 0x80]), Err(Utf8(NotAContinuationByte(1))));
assert_eq!(char::from_utf8_array([0xc1, 0x80, 0, 0]), Err(Utf8(OverLong)));
assert_eq!(char::from_utf8_array([0xf7, 0xaa, 0x99, 0x88]), Err(Codepoint(TooHigh)));
Sourcefn from_utf16_tuple(
utf16: (u16, Option<u16>),
) -> Result<Self, InvalidUtf16Tuple>
fn from_utf16_tuple( utf16: (u16, Option<u16>), ) -> Result<Self, InvalidUtf16Tuple>
Convert a UTF-16 pair as returned from .to_utf16_tuple()
into a char
.
Sourceunsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self
unsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self
Convert an UTF-8 sequence into a char.
The length of the slice is taken as length of the sequence; it should be 1,2,3 or 4.
§Safety
The slice must contain exactly one, valid, UTF-8 sequence.
Passing a slice that produces an invalid codepoint is always undefined behavior; Later checks that the codepoint is valid can be removed by the compiler.
§Panics
If the slice is empty
Sourceunsafe fn from_utf16_tuple_unchecked(utf16: (u16, Option<u16>)) -> Self
unsafe fn from_utf16_tuple_unchecked(utf16: (u16, Option<u16>)) -> Self
Convert a UTF-16 tuple as returned from .to_utf16_tuple()
into a char
.
Sourcefn from_u32_detailed(c: u32) -> Result<Self, InvalidCodepoint>
fn from_u32_detailed(c: u32) -> Result<Self, InvalidCodepoint>
Perform some extra validations compared to char::from_u32_unchecked()
§Errors
This function will return an error if
- the value is greater than 0x10ffff
- the value is between 0xd800 and 0xdfff (inclusive)
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.