pub unsafe trait FromZeros: TryFromBytes {
// Provided methods
fn zero(&mut self) { ... }
fn new_zeroed() -> Self
where Self: Sized { ... }
fn new_box_zeroed() -> Result<Box<Self>, AllocError>
where Self: Sized { ... }
fn new_box_zeroed_with_elems(count: usize) -> Result<Box<Self>, AllocError>
where Self: KnownLayout<PointerMetadata = usize> { ... }
fn new_vec_zeroed(len: usize) -> Result<Vec<Self>, AllocError>
where Self: Sized { ... }
fn extend_vec_zeroed(
v: &mut Vec<Self>,
additional: usize,
) -> Result<(), AllocError>
where Self: Sized { ... }
fn insert_vec_zeroed(
v: &mut Vec<Self>,
position: usize,
additional: usize,
) -> Result<(), AllocError>
where Self: Sized { ... }
}Expand description
Types for which a sequence of 0 bytes is a valid instance.
Any memory region of the appropriate length which is guaranteed to contain
only zero bytes can be viewed as any FromZeros type with no runtime
overhead. This is useful whenever memory is known to be in a zeroed state,
such memory returned from some allocation routines.
§Warning: Padding bytes
Note that, when a value is moved or copied, only the non-padding bytes of
that value are guaranteed to be preserved. It is unsound to assume that
values written to padding bytes are preserved after a move or copy. For more
details, see the FromBytes docs.
§Implementation
Do not implement this trait yourself! Instead, use
#[derive(FromZeros)]; e.g.:
#[derive(FromZeros)]
struct MyStruct {
...
}
#[derive(FromZeros)]
#[repr(u8)]
enum MyEnum {
...
}
#[derive(FromZeros, Immutable)]
union MyUnion {
...
}This derive performs a sophisticated, compile-time safety analysis to
determine whether a type is FromZeros.
§Safety
This section describes what is required in order for T: FromZeros, and
what unsafe code may assume of such types. If you don’t plan on implementing
FromZeros manually, and you don’t plan on writing unsafe code that
operates on FromZeros types, then you don’t need to read this section.
If T: FromZeros, then unsafe code may assume that it is sound to produce a
T whose bytes are all initialized to zero. If a type is marked as
FromZeros which violates this contract, it may cause undefined behavior.
#[derive(FromZeros)] only permits types which satisfy these
requirements.
Provided Methods§
Sourcefn zero(&mut self)
fn zero(&mut self)
Overwrites self with zeros.
Sets every byte in self to 0. While this is similar to doing *self = Self::new_zeroed(), it differs in that zero does not semantically
drop the current value and replace it with a new one — it simply
modifies the bytes of the existing value.
§Examples
#[derive(FromZeros)]
#[repr(C)]
struct PacketHeader {
src_port: [u8; 2],
dst_port: [u8; 2],
length: [u8; 2],
checksum: [u8; 2],
}
let mut header = PacketHeader {
src_port: 100u16.to_be_bytes(),
dst_port: 200u16.to_be_bytes(),
length: 300u16.to_be_bytes(),
checksum: 400u16.to_be_bytes(),
};
header.zero();
assert_eq!(header.src_port, [0, 0]);
assert_eq!(header.dst_port, [0, 0]);
assert_eq!(header.length, [0, 0]);
assert_eq!(header.checksum, [0, 0]);§ Code Generation
This abstraction is safe and cheap, but does not necessarily have zero runtime cost. The codegen you experience in practice will depend on optimization level, the layout of the destination type, and what the compiler can prove about the source.
The below examples illustrate typical codegen for increasingly complex types:
Sized
Format
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
macro_rules! define_packet {
($name: ident, $trait: ident, $leading_field: ty) => {
#[derive($trait, KnownLayout, Immutable, IntoBytes)]
#[repr(C, align(2))]
pub struct $name {
magic_number: $leading_field,
mug_size: u8,
temperature: u8,
marshmallows: [u8; 2],
}
};
}
/// Packet begins with bytes 0xC0C0.
define_packet!(CocoPacket, TryFromBytes, C0C0);
/// Packet begins with any two bytes.
define_packet!(LocoPacket, FromBytes, [u8; 2]);
Benchmark
use zerocopy::*;
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_zero_static_size(source: &mut format::LocoPacket) {
source.zero()
}
Assembly
bench_zero_static_size:
mov word ptr [rdi + 4], 0
mov dword ptr [rdi], 0
ret
Machine Code Analysis
Iterations: 100
Instructions: 300
Total Cycles: 203
Total uOps: 300
Dispatch Width: 4
uOps Per Cycle: 1.48
IPC: 1.48
Block RThroughput: 2.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 1.00 * mov word ptr [rdi + 4], 0
1 1 1.00 * mov dword ptr [rdi], 0
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - - - 2.00 1.00 1.00 1.00
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - 1.00 - - 1.00 mov word ptr [rdi + 4], 0
- - - - 1.00 - 1.00 - mov dword ptr [rdi], 0
- - - - - 1.00 - - ret
Unsized
Format
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
macro_rules! define_packet {
($name: ident, $trait: ident, $leading_field: ty) => {
#[derive($trait, KnownLayout, Immutable, IntoBytes, SplitAt)]
#[repr(C, align(2))]
pub struct $name {
magic_number: $leading_field,
mug_size: u8,
temperature: u8,
marshmallows: [[u8; 2]],
}
};
}
/// Packet begins with bytes 0xC0C0.
define_packet!(CocoPacket, TryFromBytes, C0C0);
/// Packet begins with any two bytes.
define_packet!(LocoPacket, FromBytes, [u8; 2]);
Benchmark
use zerocopy::*;
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_zero_dynamic_size(source: &mut format::LocoPacket) {
source.zero()
}
Assembly
bench_zero_dynamic_size:
lea rdx, [2*rsi + 5]
and rdx, -2
xor esi, esi
jmp qword ptr [rip + memset@GOTPCREL]
Machine Code Analysis
Iterations: 100
Instructions: 400
Total Cycles: 142
Total uOps: 500
Dispatch Width: 4
uOps Per Cycle: 3.52
IPC: 2.82
Block RThroughput: 1.3
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.50 lea rdx, [2*rsi + 5]
1 1 0.33 and rdx, -2
1 0 0.25 xor esi, esi
2 6 1.00 * jmp qword ptr [rip + memset@GOTPCREL]
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 0.99 1.00 - 1.01 0.50 0.50
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.99 0.01 - - - - lea rdx, [2*rsi + 5]
- - - 0.99 - 0.01 - - and rdx, -2
- - - - - - - - xor esi, esi
- - - - - 1.00 0.50 0.50 jmp qword ptr [rip + memset@GOTPCREL]
Dynamically Padded
Format
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
#[derive(FromBytes, KnownLayout, Immutable, SplitAt)]
#[repr(C, align(4))]
pub struct Packet<Magic> {
magic_number: Magic,
milk: u8,
mug_size: u8,
temperature: [u8; 5],
marshmallows: [[u8; 3]],
}
/// A packet begining with the magic number `0xC0C0`.
pub type CocoPacket = Packet<C0C0>;
/// A packet beginning with any two initialized bytes.
pub type LocoPacket = Packet<[u8; 2]>;
Benchmark
use zerocopy::*;
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_zero_dynamic_padding(source: &mut format::LocoPacket) {
source.zero()
}
Assembly
bench_zero_dynamic_padding:
lea rax, [rsi + 2*rsi]
movabs rdx, 9223372036854775804
and rdx, rax
add rdx, 12
xor esi, esi
jmp qword ptr [rip + memset@GOTPCREL]
Machine Code Analysis
Iterations: 100
Instructions: 600
Total Cycles: 209
Total uOps: 700
Dispatch Width: 4
uOps Per Cycle: 3.35
IPC: 2.87
Block RThroughput: 1.8
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 1 0.50 lea rax, [rsi + 2*rsi]
1 1 0.33 movabs rdx, 9223372036854775804
1 1 0.33 and rdx, rax
1 1 0.33 add rdx, 12
1 0 0.25 xor esi, esi
2 6 1.00 * jmp qword ptr [rip + memset@GOTPCREL]
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 1.66 1.66 - 1.68 0.50 0.50
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - 0.33 0.67 - - - - lea rax, [rsi + 2*rsi]
- - 0.98 - - 0.02 - - movabs rdx, 9223372036854775804
- - 0.01 0.66 - 0.33 - - and rdx, rax
- - 0.34 0.33 - 0.33 - - add rdx, 12
- - - - - - - - xor esi, esi
- - - - - 1.00 0.50 0.50 jmp qword ptr [rip + memset@GOTPCREL]
Sourcefn new_zeroed() -> Selfwhere
Self: Sized,
fn new_zeroed() -> Selfwhere
Self: Sized,
Creates an instance of Self from zeroed bytes.
§Examples
#[derive(FromZeros)]
#[repr(C)]
struct PacketHeader {
src_port: [u8; 2],
dst_port: [u8; 2],
length: [u8; 2],
checksum: [u8; 2],
}
let header: PacketHeader = FromZeros::new_zeroed();
assert_eq!(header.src_port, [0, 0]);
assert_eq!(header.dst_port, [0, 0]);
assert_eq!(header.length, [0, 0]);
assert_eq!(header.checksum, [0, 0]);§ Code Generation
This abstraction is safe and cheap, but does not necessarily have zero runtime cost. The codegen you experience in practice will depend on optimization level, the layout of the destination type, and what the compiler can prove about the source.
Format
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
macro_rules! define_packet {
($name: ident, $trait: ident, $leading_field: ty) => {
#[derive($trait, KnownLayout, Immutable, IntoBytes)]
#[repr(C, align(2))]
pub struct $name {
magic_number: $leading_field,
mug_size: u8,
temperature: u8,
marshmallows: [u8; 2],
}
};
}
/// Packet begins with bytes 0xC0C0.
define_packet!(CocoPacket, TryFromBytes, C0C0);
/// Packet begins with any two bytes.
define_packet!(LocoPacket, FromBytes, [u8; 2]);
Benchmark
use zerocopy::*;
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_new_zeroed() -> format::LocoPacket {
FromZeros::new_zeroed()
}
Assembly
bench_new_zeroed:
xor eax, eax
ret
Machine Code Analysis
Iterations: 100
Instructions: 200
Total Cycles: 103
Total uOps: 200
Dispatch Width: 4
uOps Per Cycle: 1.94
IPC: 1.94
Block RThroughput: 1.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
1 0 0.25 xor eax, eax
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - - - - 1.00 - -
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - - - - - xor eax, eax
- - - - - 1.00 - - ret
Sourcefn new_box_zeroed() -> Result<Box<Self>, AllocError>where
Self: Sized,
fn new_box_zeroed() -> Result<Box<Self>, AllocError>where
Self: Sized,
Creates a Box<Self> from zeroed bytes.
This function is useful for allocating large values on the heap and
zero-initializing them, without ever creating a temporary instance of
Self on the stack. For example, <[u8; 1048576]>::new_box_zeroed()
will allocate [u8; 1048576] directly on the heap; it does not require
storing [u8; 1048576] in a temporary variable on the stack.
On systems that use a heap implementation that supports allocating from
pre-zeroed memory, using new_box_zeroed (or related functions) may
have performance benefits.
§Errors
Returns an error on allocation failure. Allocation failure is guaranteed never to cause a panic or an abort.
§ Code Generation
This abstraction is safe and cheap, but does not necessarily have zero runtime cost. The codegen you experience in practice will depend on optimization level, the layout of the destination type, and what the compiler can prove about the source.
Format
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
macro_rules! define_packet {
($name: ident, $trait: ident, $leading_field: ty) => {
#[derive($trait, KnownLayout, Immutable, IntoBytes)]
#[repr(C, align(2))]
pub struct $name {
magic_number: $leading_field,
mug_size: u8,
temperature: u8,
marshmallows: [u8; 2],
}
};
}
/// Packet begins with bytes 0xC0C0.
define_packet!(CocoPacket, TryFromBytes, C0C0);
/// Packet begins with any two bytes.
define_packet!(LocoPacket, FromBytes, [u8; 2]);
Benchmark
use zerocopy::*;
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_new_box_zeroed() -> Option<Box<format::LocoPacket>> {
FromZeros::new_box_zeroed().ok()
}
Assembly
bench_new_box_zeroed:
push rax
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
mov edi, 6
mov esi, 2
pop rax
jmp qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
Machine Code Analysis
Iterations: 100
Instructions: 600
Total Cycles: 1197
Total uOps: 1100
Dispatch Width: 4
uOps Per Cycle: 0.92
IPC: 0.50
Block RThroughput: 2.8
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
2 5 1.00 * push rax
4 7 1.00 * call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
1 1 0.33 mov edi, 6
1 1 0.33 mov esi, 2
1 6 0.50 * pop rax
2 6 1.00 * jmp qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 0.99 1.00 2.00 2.01 2.07 2.93
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - 1.00 - 0.93 0.07 push rax
- - - - 1.00 1.00 0.12 1.88 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
- - 0.99 - - 0.01 - - mov edi, 6
- - - 1.00 - - - - mov esi, 2
- - - - - - 0.94 0.06 pop rax
- - - - - 1.00 0.08 0.92 jmp qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
Sourcefn new_box_zeroed_with_elems(count: usize) -> Result<Box<Self>, AllocError>where
Self: KnownLayout<PointerMetadata = usize>,
fn new_box_zeroed_with_elems(count: usize) -> Result<Box<Self>, AllocError>where
Self: KnownLayout<PointerMetadata = usize>,
Creates a Box<[Self]> (a boxed slice) from zeroed bytes.
This function is useful for allocating large values of [Self] on the
heap and zero-initializing them, without ever creating a temporary
instance of [Self; _] on the stack. For example,
u8::new_box_slice_zeroed(1048576) will allocate the slice directly on
the heap; it does not require storing the slice on the stack.
On systems that use a heap implementation that supports allocating from
pre-zeroed memory, using new_box_slice_zeroed may have performance
benefits.
If Self is a zero-sized type, then this function will return a
Box<[Self]> that has the correct len. Such a box cannot contain any
actual information, but its len() property will report the correct
value.
§Errors
Returns an error on allocation failure. Allocation failure is guaranteed never to cause a panic or an abort.
§ Code Generation
This abstraction is safe and cheap, but does not necessarily have zero runtime cost. The codegen you experience in practice will depend on optimization level, the layout of the destination type, and what the compiler can prove about the source.
The below examples illustrate typical codegen for increasingly complex types:
Unsized
Format
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
macro_rules! define_packet {
($name: ident, $trait: ident, $leading_field: ty) => {
#[derive($trait, KnownLayout, Immutable, IntoBytes, SplitAt)]
#[repr(C, align(2))]
pub struct $name {
magic_number: $leading_field,
mug_size: u8,
temperature: u8,
marshmallows: [[u8; 2]],
}
};
}
/// Packet begins with bytes 0xC0C0.
define_packet!(CocoPacket, TryFromBytes, C0C0);
/// Packet begins with any two bytes.
define_packet!(LocoPacket, FromBytes, [u8; 2]);
Benchmark
use zerocopy::*;
#[path = "formats/coco_dynamic_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_new_box_zeroed_with_elems_dynamic_size(count: usize) -> Option<Box<format::LocoPacket>> {
FromZeros::new_box_zeroed_with_elems(count).ok()
}
Assembly
bench_new_box_zeroed_with_elems_dynamic_size:
push r14
push rbx
push rax
mov rbx, rdi
movabs rax, 4611686018427387900
cmp rdi, rax
jbe .LBB5_2
xor eax, eax
jmp .LBB5_3
.LBB5_2:
lea r14, [2*rbx + 4]
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
mov esi, 2
mov rdi, r14
call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
.LBB5_3:
mov rdx, rbx
add rsp, 8
pop rbx
pop r14
ret
Machine Code Analysis
Iterations: 100
Instructions: 1900
Total Cycles: 2990
Total uOps: 2800
Dispatch Width: 4
uOps Per Cycle: 0.94
IPC: 0.64
Block RThroughput: 7.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
2 5 1.00 * push r14
2 5 1.00 * push rbx
2 5 1.00 * push rax
1 1 0.33 mov rbx, rdi
1 1 0.33 movabs rax, 4611686018427387900
1 1 0.33 cmp rdi, rax
1 1 1.00 jbe .LBB5_2
1 0 0.25 xor eax, eax
1 1 1.00 jmp .LBB5_3
1 1 0.50 lea r14, [2*rbx + 4]
4 7 1.00 * call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
1 1 0.33 mov esi, 2
1 1 0.33 mov rdi, r14
4 7 1.00 * call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
1 1 0.33 mov rdx, rbx
1 1 0.33 add rsp, 8
1 6 0.50 * pop rbx
1 6 0.50 * pop r14
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 3.97 3.97 5.00 5.06 4.50 4.50
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - 1.00 - 0.50 0.50 push r14
- - - - 1.00 - 0.50 0.50 push rbx
- - - - 1.00 - 0.50 0.50 push rax
- - 0.94 0.05 - 0.01 - - mov rbx, rdi
- - 0.05 0.95 - - - - movabs rax, 4611686018427387900
- - 0.95 - - 0.05 - - cmp rdi, rax
- - - - - 1.00 - - jbe .LBB5_2
- - - - - - - - xor eax, eax
- - - - - 1.00 - - jmp .LBB5_3
- - - 1.00 - - - - lea r14, [2*rbx + 4]
- - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
- - 0.06 0.94 - - - - mov esi, 2
- - 0.94 0.06 - - - - mov rdi, r14
- - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
- - 0.05 0.95 - - - - mov rdx, rbx
- - 0.98 0.02 - - - - add rsp, 8
- - - - - - 0.50 0.50 pop rbx
- - - - - - 0.50 0.50 pop r14
- - - - - 1.00 - - ret
Dynamically Padded
Format
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
#[derive(FromBytes, KnownLayout, Immutable, SplitAt)]
#[repr(C, align(4))]
pub struct Packet<Magic> {
magic_number: Magic,
milk: u8,
mug_size: u8,
temperature: [u8; 5],
marshmallows: [[u8; 3]],
}
/// A packet begining with the magic number `0xC0C0`.
pub type CocoPacket = Packet<C0C0>;
/// A packet beginning with any two initialized bytes.
pub type LocoPacket = Packet<[u8; 2]>;
Benchmark
use zerocopy::*;
#[path = "formats/coco_dynamic_padding.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_new_box_zeroed_with_elems_dynamic_padding(
count: usize,
) -> Option<Box<format::LocoPacket>> {
FromZeros::new_box_zeroed_with_elems(count).ok()
}
Assembly
bench_new_box_zeroed_with_elems_dynamic_padding:
push r14
push rbx
push rax
mov rbx, rdi
mov ecx, 3
mov rax, rdi
mul rcx
jo .LBB5_6
mov r14, rax
cmp rax, -10
ja .LBB5_6
lea rax, [r14 + 9]
not r14d
and r14d, 3
add r14, rax
setb al
movabs rcx, 9223372036854775803
cmp r14, rcx
seta cl
or cl, al
je .LBB5_4
.LBB5_6:
xor eax, eax
jmp .LBB5_5
.LBB5_4:
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
mov esi, 4
mov rdi, r14
call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
.LBB5_5:
mov rdx, rbx
add rsp, 8
pop rbx
pop r14
ret
Machine Code Analysis
Iterations: 100
Instructions: 3200
Total Cycles: 2989
Total uOps: 4300
Dispatch Width: 4
uOps Per Cycle: 1.44
IPC: 1.07
Block RThroughput: 10.8
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
2 5 1.00 * push r14
2 5 1.00 * push rbx
2 5 1.00 * push rax
1 1 0.33 mov rbx, rdi
1 1 0.33 mov ecx, 3
1 1 0.33 mov rax, rdi
2 4 1.00 mul rcx
1 1 1.00 jo .LBB5_6
1 1 0.33 mov r14, rax
1 1 0.33 cmp rax, -10
1 1 1.00 ja .LBB5_6
1 1 0.50 lea rax, [r14 + 9]
1 1 0.33 not r14d
1 1 0.33 and r14d, 3
1 1 0.33 add r14, rax
1 1 0.50 setb al
1 1 0.33 movabs rcx, 9223372036854775803
1 1 0.33 cmp r14, rcx
2 2 1.00 seta cl
1 1 0.33 or cl, al
1 1 1.00 je .LBB5_4
1 0 0.25 xor eax, eax
1 1 1.00 jmp .LBB5_5
4 7 1.00 * call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
1 1 0.33 mov esi, 4
1 1 0.33 mov rdi, r14
4 7 1.00 * call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
1 1 0.33 mov rdx, rbx
1 1 0.33 add rsp, 8
1 6 0.50 * pop rbx
1 6 0.50 * pop r14
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 8.99 8.98 5.00 10.03 4.49 4.51
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - 1.00 - 0.49 0.51 push r14
- - - - 1.00 - 0.51 0.49 push rbx
- - - - 1.00 - 0.49 0.51 push rax
- - 0.95 0.04 - 0.01 - - mov rbx, rdi
- - - 0.97 - 0.03 - - mov ecx, 3
- - 0.02 0.02 - 0.96 - - mov rax, rdi
- - 1.00 1.00 - - - - mul rcx
- - - - - 1.00 - - jo .LBB5_6
- - 0.02 0.97 - 0.01 - - mov r14, rax
- - 0.97 0.03 - - - - cmp rax, -10
- - - - - 1.00 - - ja .LBB5_6
- - 0.99 0.01 - - - - lea rax, [r14 + 9]
- - 0.01 0.99 - - - - not r14d
- - 0.97 0.03 - - - - and r14d, 3
- - 0.01 0.98 - 0.01 - - add r14, rax
- - 1.00 - - - - - setb al
- - 0.02 - - 0.98 - - movabs rcx, 9223372036854775803
- - - 0.97 - 0.03 - - cmp r14, rcx
- - 2.00 - - - - - seta cl
- - 0.03 0.03 - 0.94 - - or cl, al
- - - - - 1.00 - - je .LBB5_4
- - - - - - - - xor eax, eax
- - - - - 1.00 - - jmp .LBB5_5
- - - - 1.00 1.00 1.02 0.98 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
- - 0.03 0.97 - - - - mov esi, 4
- - 0.96 0.01 - 0.03 - - mov rdi, r14
- - - - 1.00 1.00 0.98 1.02 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
- - - 0.97 - 0.03 - - mov rdx, rbx
- - 0.01 0.99 - - - - add rsp, 8
- - - - - - 0.50 0.50 pop rbx
- - - - - - 0.50 0.50 pop r14
- - - - - 1.00 - - ret
Sourcefn new_vec_zeroed(len: usize) -> Result<Vec<Self>, AllocError>where
Self: Sized,
fn new_vec_zeroed(len: usize) -> Result<Vec<Self>, AllocError>where
Self: Sized,
Creates a Vec<Self> from zeroed bytes.
This function is useful for allocating large values of Vecs and
zero-initializing them, without ever creating a temporary instance of
[Self; _] (or many temporary instances of Self) on the stack. For
example, u8::new_vec_zeroed(1048576) will allocate directly on the
heap; it does not require storing intermediate values on the stack.
On systems that use a heap implementation that supports allocating from
pre-zeroed memory, using new_vec_zeroed may have performance benefits.
If Self is a zero-sized type, then this function will return a
Vec<Self> that has the correct len. Such a Vec cannot contain any
actual information, but its len() property will report the correct
value.
§Errors
Returns an error on allocation failure. Allocation failure is guaranteed never to cause a panic or an abort.
§ Code Generation
This abstraction is safe and cheap, but does not necessarily have zero runtime cost. The codegen you experience in practice will depend on optimization level, the layout of the destination type, and what the compiler can prove about the source.
Format
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
macro_rules! define_packet {
($name: ident, $trait: ident, $leading_field: ty) => {
#[derive($trait, KnownLayout, Immutable, IntoBytes)]
#[repr(C, align(2))]
pub struct $name {
magic_number: $leading_field,
mug_size: u8,
temperature: u8,
marshmallows: [u8; 2],
}
};
}
/// Packet begins with bytes 0xC0C0.
define_packet!(CocoPacket, TryFromBytes, C0C0);
/// Packet begins with any two bytes.
define_packet!(LocoPacket, FromBytes, [u8; 2]);
Benchmark
use zerocopy::*;
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_new_vec_zeroed(len: usize) -> Option<Vec<format::LocoPacket>> {
FromZeros::new_vec_zeroed(len).ok()
}
Assembly
bench_new_vec_zeroed:
push r15
push r14
push r12
push rbx
push rax
mov rbx, rdi
movabs r12, 9223372036854775805
mov ecx, 6
mov rax, rsi
mul rcx
jo .LBB5_6
cmp rax, r12
jbe .LBB5_2
.LBB5_6:
add r12, 3
mov qword ptr [rbx], r12
jmp .LBB5_7
.LBB5_2:
mov r14, rsi
test rax, rax
je .LBB5_3
mov r15, rax
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
mov esi, 2
mov rdi, r15
call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
test rax, rax
jne .LBB5_5
jmp .LBB5_6
.LBB5_3:
mov eax, 2
.LBB5_5:
mov qword ptr [rbx], r14
mov qword ptr [rbx + 8], rax
mov qword ptr [rbx + 16], r14
.LBB5_7:
mov rax, rbx
add rsp, 8
pop rbx
pop r12
pop r14
pop r15
ret
Machine Code Analysis
Iterations: 100
Instructions: 3800
Total Cycles: 5277
Total uOps: 5000
Dispatch Width: 4
uOps Per Cycle: 0.95
IPC: 0.72
Block RThroughput: 12.5
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
2 5 1.00 * push r15
2 5 1.00 * push r14
2 5 1.00 * push r12
2 5 1.00 * push rbx
2 5 1.00 * push rax
1 1 0.33 mov rbx, rdi
1 1 0.33 movabs r12, 9223372036854775805
1 1 0.33 mov ecx, 6
1 1 0.33 mov rax, rsi
2 4 1.00 mul rcx
1 1 1.00 jo .LBB5_6
1 1 0.33 cmp rax, r12
1 1 1.00 jbe .LBB5_2
1 1 0.33 add r12, 3
1 1 1.00 * mov qword ptr [rbx], r12
1 1 1.00 jmp .LBB5_7
1 1 0.33 mov r14, rsi
1 1 0.33 test rax, rax
1 1 1.00 je .LBB5_3
1 1 0.33 mov r15, rax
4 7 1.00 * call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
1 1 0.33 mov esi, 2
1 1 0.33 mov rdi, r15
4 7 1.00 * call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
1 1 0.33 test rax, rax
1 1 1.00 jne .LBB5_5
1 1 1.00 jmp .LBB5_6
1 1 0.33 mov eax, 2
1 1 1.00 * mov qword ptr [rbx], r14
1 1 1.00 * mov qword ptr [rbx + 8], rax
1 1 1.00 * mov qword ptr [rbx + 16], r14
1 1 0.33 mov rax, rbx
1 1 0.33 add rsp, 8
1 6 0.50 * pop rbx
1 6 0.50 * pop r12
1 6 0.50 * pop r14
1 6 0.50 * pop r15
1 1 1.00 U ret
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 7.01 7.98 11.00 11.01 8.50 8.50
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - 1.00 - 0.50 0.50 push r15
- - - - 1.00 - 0.50 0.50 push r14
- - - - 1.00 - 0.50 0.50 push r12
- - - - 1.00 - 0.50 0.50 push rbx
- - - - 1.00 - 0.50 0.50 push rax
- - 0.98 0.01 - 0.01 - - mov rbx, rdi
- - 0.01 0.99 - - - - movabs r12, 9223372036854775805
- - 0.02 - - 0.98 - - mov ecx, 6
- - - 0.98 - 0.02 - - mov rax, rsi
- - 1.00 1.00 - - - - mul rcx
- - - - - 1.00 - - jo .LBB5_6
- - 0.99 0.01 - - - - cmp rax, r12
- - - - - 1.00 - - jbe .LBB5_2
- - - - - 1.00 - - add r12, 3
- - - - 1.00 - 0.50 0.50 mov qword ptr [rbx], r12
- - - - - 1.00 - - jmp .LBB5_7
- - 0.98 0.02 - - - - mov r14, rsi
- - 0.01 0.99 - - - - test rax, rax
- - - - - 1.00 - - je .LBB5_3
- - 0.99 0.01 - - - - mov r15, rax
- - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
- - 0.01 0.99 - - - - mov esi, 2
- - 0.99 0.01 - - - - mov rdi, r15
- - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL]
- - 0.01 0.99 - - - - test rax, rax
- - - - - 1.00 - - jne .LBB5_5
- - - - - 1.00 - - jmp .LBB5_6
- - 0.02 0.98 - - - - mov eax, 2
- - - - 1.00 - 0.50 0.50 mov qword ptr [rbx], r14
- - - - 1.00 - 0.50 0.50 mov qword ptr [rbx + 8], rax
- - - - 1.00 - 0.50 0.50 mov qword ptr [rbx + 16], r14
- - 0.97 0.03 - - - - mov rax, rbx
- - 0.03 0.97 - - - - add rsp, 8
- - - - - - 0.50 0.50 pop rbx
- - - - - - 0.50 0.50 pop r12
- - - - - - 0.50 0.50 pop r14
- - - - - - 0.50 0.50 pop r15
- - - - - 1.00 - - ret
Sourcefn extend_vec_zeroed(
v: &mut Vec<Self>,
additional: usize,
) -> Result<(), AllocError>where
Self: Sized,
fn extend_vec_zeroed(
v: &mut Vec<Self>,
additional: usize,
) -> Result<(), AllocError>where
Self: Sized,
Extends a Vec<Self> by pushing additional new items onto the end of
the vector. The new items are initialized with zeros.
§ Code Generation
This abstraction is safe and cheap, but does not necessarily have zero runtime cost. The codegen you experience in practice will depend on optimization level, the layout of the destination type, and what the compiler can prove about the source.
Format
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
macro_rules! define_packet {
($name: ident, $trait: ident, $leading_field: ty) => {
#[derive($trait, KnownLayout, Immutable, IntoBytes)]
#[repr(C, align(2))]
pub struct $name {
magic_number: $leading_field,
mug_size: u8,
temperature: u8,
marshmallows: [u8; 2],
}
};
}
/// Packet begins with bytes 0xC0C0.
define_packet!(CocoPacket, TryFromBytes, C0C0);
/// Packet begins with any two bytes.
define_packet!(LocoPacket, FromBytes, [u8; 2]);
Benchmark
use zerocopy::*;
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_extend_vec_zeroed(v: &mut Vec<format::LocoPacket>, additional: usize) -> Option<()> {
FromZeros::extend_vec_zeroed(v, additional).ok()
}
Assembly
bench_extend_vec_zeroed:
push r15
push r14
push r13
push r12
push rbx
sub rsp, 32
mov rbx, rdi
mov rax, qword ptr [rdi]
mov r12, qword ptr [rdi + 16]
mov rcx, rax
sub rcx, r12
cmp rsi, rcx
jbe .LBB6_3
mov r15, r12
add r15, rsi
jae .LBB6_6
.LBB6_2:
xor eax, eax
jmp .LBB6_5
.LBB6_3:
mov rax, qword ptr [rbx + 8]
lea r15, [r12 + rsi]
.LBB6_4:
lea rcx, [r12 + 2*r12]
lea rdi, [rax + 2*rcx]
add rsi, rsi
lea rdx, [rsi + 2*rsi]
xor esi, esi
call qword ptr [rip + memset@GOTPCREL]
mov qword ptr [rbx + 16], r15
mov al, 1
.LBB6_5:
add rsp, 32
pop rbx
pop r12
pop r13
pop r14
pop r15
ret
.LBB6_6:
mov r13, rsi
lea rcx, [rax + rax]
cmp r15, rcx
cmova rcx, r15
cmp rcx, 5
mov r14d, 4
cmovae r14, rcx
mov rdx, qword ptr [rbx + 8]
lea rdi, [rsp + 8]
mov rsi, rax
mov rcx, r14
call <alloc::raw_vec::RawVecInner>::finish_grow
cmp dword ptr [rsp + 8], 1
je .LBB6_2
mov rax, qword ptr [rsp + 16]
mov qword ptr [rbx + 8], rax
mov qword ptr [rbx], r14
mov rsi, r13
jmp .LBB6_4
Machine Code Analysis
Iterations: 100
Instructions: 5400
Total Cycles: 6595
Total uOps: 6800
Dispatch Width: 4
uOps Per Cycle: 1.03
IPC: 0.82
Block RThroughput: 17.0
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
2 5 1.00 * push r15
2 5 1.00 * push r14
2 5 1.00 * push r13
2 5 1.00 * push r12
2 5 1.00 * push rbx
1 1 0.33 sub rsp, 32
1 1 0.33 mov rbx, rdi
1 5 0.50 * mov rax, qword ptr [rdi]
1 5 0.50 * mov r12, qword ptr [rdi + 16]
1 1 0.33 mov rcx, rax
1 1 0.33 sub rcx, r12
1 1 0.33 cmp rsi, rcx
1 1 1.00 jbe .LBB6_3
1 1 0.33 mov r15, r12
1 1 0.33 add r15, rsi
1 1 1.00 jae .LBB6_6
1 0 0.25 xor eax, eax
1 1 1.00 jmp .LBB6_5
1 5 0.50 * mov rax, qword ptr [rbx + 8]
1 1 0.50 lea r15, [r12 + rsi]
1 1 0.50 lea rcx, [r12 + 2*r12]
1 1 0.50 lea rdi, [rax + 2*rcx]
1 1 0.33 add rsi, rsi
1 1 0.50 lea rdx, [rsi + 2*rsi]
1 0 0.25 xor esi, esi
4 7 1.00 * call qword ptr [rip + memset@GOTPCREL]
1 1 1.00 * mov qword ptr [rbx + 16], r15
1 1 0.33 mov al, 1
1 1 0.33 add rsp, 32
1 6 0.50 * pop rbx
1 6 0.50 * pop r12
1 6 0.50 * pop r13
1 6 0.50 * pop r14
1 6 0.50 * pop r15
1 1 1.00 U ret
1 1 0.33 mov r13, rsi
1 1 0.50 lea rcx, [rax + rax]
1 1 0.33 cmp r15, rcx
3 3 1.00 cmova rcx, r15
1 1 0.33 cmp rcx, 5
1 1 0.33 mov r14d, 4
2 2 0.67 cmovae r14, rcx
1 5 0.50 * mov rdx, qword ptr [rbx + 8]
1 1 0.50 lea rdi, [rsp + 8]
1 1 0.33 mov rsi, rax
1 1 0.33 mov rcx, r14
3 5 1.00 call <alloc::raw_vec::RawVecInner>::finish_grow
2 6 0.50 * cmp dword ptr [rsp + 8], 1
1 1 1.00 je .LBB6_2
1 5 0.50 * mov rax, qword ptr [rsp + 16]
1 1 1.00 * mov qword ptr [rbx + 8], rax
1 1 1.00 * mov qword ptr [rbx], r14
1 1 0.33 mov rsi, r13
1 1 1.00 jmp .LBB6_4
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 12.00 12.00 10.00 13.00 11.00 11.00
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - 1.00 - 0.49 0.51 push r15
- - - - 1.00 - 0.51 0.49 push r14
- - - - 1.00 - 0.50 0.50 push r13
- - - - 1.00 - 0.50 0.50 push r12
- - - - 1.00 - 0.50 0.50 push rbx
- - 0.01 0.99 - - - - sub rsp, 32
- - - - - 1.00 - - mov rbx, rdi
- - - - - - 0.50 0.50 mov rax, qword ptr [rdi]
- - - - - - 0.50 0.50 mov r12, qword ptr [rdi + 16]
- - - 1.00 - - - - mov rcx, rax
- - - 0.99 - 0.01 - - sub rcx, r12
- - - - - 1.00 - - cmp rsi, rcx
- - - - - 1.00 - - jbe .LBB6_3
- - 0.01 0.98 - 0.01 - - mov r15, r12
- - 0.99 0.01 - - - - add r15, rsi
- - - - - 1.00 - - jae .LBB6_6
- - - - - - - - xor eax, eax
- - - - - 1.00 - - jmp .LBB6_5
- - - - - - 0.50 0.50 mov rax, qword ptr [rbx + 8]
- - 1.00 - - - - - lea r15, [r12 + rsi]
- - 0.98 0.02 - - - - lea rcx, [r12 + 2*r12]
- - 0.99 0.01 - - - - lea rdi, [rax + 2*rcx]
- - - 1.00 - - - - add rsi, rsi
- - 0.99 0.01 - - - - lea rdx, [rsi + 2*rsi]
- - - - - - - - xor esi, esi
- - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + memset@GOTPCREL]
- - - - 1.00 - 0.50 0.50 mov qword ptr [rbx + 16], r15
- - 0.01 0.99 - - - - mov al, 1
- - 1.00 - - - - - add rsp, 32
- - - - - - 0.50 0.50 pop rbx
- - - - - - 0.50 0.50 pop r12
- - - - - - 0.50 0.50 pop r13
- - - - - - 0.50 0.50 pop r14
- - - - - - 0.50 0.50 pop r15
- - - - - 1.00 - - ret
- - 1.00 - - - - - mov r13, rsi
- - 0.01 0.99 - - - - lea rcx, [rax + rax]
- - 0.99 0.01 - - - - cmp r15, rcx
- - 2.00 0.01 - 0.99 - - cmova rcx, r15
- - 0.01 0.99 - - - - cmp rcx, 5
- - 0.01 0.99 - - - - mov r14d, 4
- - 1.00 0.01 - 0.99 - - cmovae r14, rcx
- - - - - - 0.50 0.50 mov rdx, qword ptr [rbx + 8]
- - 0.01 0.99 - - - - lea rdi, [rsp + 8]
- - - 1.00 - - - - mov rsi, rax
- - - 0.01 - 0.99 - - mov rcx, r14
- - - - 1.00 1.00 0.50 0.50 call <alloc::raw_vec::RawVecInner>::finish_grow
- - - 0.99 - 0.01 0.50 0.50 cmp dword ptr [rsp + 8], 1
- - - - - 1.00 - - je .LBB6_2
- - - - - - 0.50 0.50 mov rax, qword ptr [rsp + 16]
- - - - 1.00 - 0.49 0.51 mov qword ptr [rbx + 8], rax
- - - - 1.00 - 0.51 0.49 mov qword ptr [rbx], r14
- - 0.99 0.01 - - - - mov rsi, r13
- - - - - 1.00 - - jmp .LBB6_4
Sourcefn insert_vec_zeroed(
v: &mut Vec<Self>,
position: usize,
additional: usize,
) -> Result<(), AllocError>where
Self: Sized,
fn insert_vec_zeroed(
v: &mut Vec<Self>,
position: usize,
additional: usize,
) -> Result<(), AllocError>where
Self: Sized,
Inserts additional new items into Vec<Self> at position. The new
items are initialized with zeros.
§Panics
Panics if position > v.len().
§ Code Generation
This abstraction is safe and cheap, but does not necessarily have zero runtime cost. The codegen you experience in practice will depend on optimization level, the layout of the destination type, and what the compiler can prove about the source.
Format
use zerocopy_derive::*;
// The only valid value of this type are the bytes `0xC0C0`.
#[derive(TryFromBytes, KnownLayout, Immutable, IntoBytes)]
#[repr(u16)]
pub enum C0C0 {
_XC0C0 = 0xC0C0,
}
macro_rules! define_packet {
($name: ident, $trait: ident, $leading_field: ty) => {
#[derive($trait, KnownLayout, Immutable, IntoBytes)]
#[repr(C, align(2))]
pub struct $name {
magic_number: $leading_field,
mug_size: u8,
temperature: u8,
marshmallows: [u8; 2],
}
};
}
/// Packet begins with bytes 0xC0C0.
define_packet!(CocoPacket, TryFromBytes, C0C0);
/// Packet begins with any two bytes.
define_packet!(LocoPacket, FromBytes, [u8; 2]);
Benchmark
use zerocopy::*;
#[path = "formats/coco_static_size.rs"]
mod format;
#[unsafe(no_mangle)]
fn bench_insert_vec_zeroed(
v: &mut Vec<format::LocoPacket>,
position: usize,
additional: usize,
) -> Option<()> {
FromZeros::insert_vec_zeroed(v, position, additional).ok()
}
Assembly
bench_insert_vec_zeroed:
push rbp
push r15
push r14
push r13
push r12
push rbx
sub rsp, 24
mov r12, qword ptr [rdi + 16]
mov r13, r12
sub r13, rsi
jb .LBB6_10
mov rbx, rdi
mov rax, qword ptr [rdi]
mov rcx, rax
sub rcx, r12
cmp rdx, rcx
jbe .LBB6_4
add r12, rdx
jae .LBB6_7
.LBB6_3:
xor eax, eax
jmp .LBB6_6
.LBB6_4:
mov rax, qword ptr [rbx + 8]
add r12, rdx
.LBB6_5:
lea rcx, [rsi + 2*rsi]
lea r14, [rax + 2*rcx]
add rdx, rdx
lea r15, [rdx + 2*rdx]
lea rdi, [r14 + r15]
add r13, r13
lea rdx, [2*r13]
add rdx, r13
mov rsi, r14
call qword ptr [rip + memmove@GOTPCREL]
mov rdi, r14
xor esi, esi
mov rdx, r15
call qword ptr [rip + memset@GOTPCREL]
mov qword ptr [rbx + 16], r12
mov al, 1
.LBB6_6:
add rsp, 24
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
.LBB6_7:
mov r15, rsi
mov rbp, rdx
lea rcx, [rax + rax]
cmp r12, rcx
cmova rcx, r12
cmp rcx, 5
mov r14d, 4
cmovae r14, rcx
mov rdx, qword ptr [rbx + 8]
mov rdi, rsp
mov rsi, rax
mov rcx, r14
call <alloc::raw_vec::RawVecInner>::finish_grow
cmp dword ptr [rsp], 1
je .LBB6_3
mov rax, qword ptr [rsp + 8]
mov qword ptr [rbx + 8], rax
mov qword ptr [rbx], r14
mov rdx, rbp
mov rsi, r15
jmp .LBB6_5
.LBB6_10:
lea rdi, [rip + .Lanon.HASH.1]
lea rdx, [rip + .Lanon.HASH.3]
mov esi, 37
call qword ptr [rip + core::panicking::panic@GOTPCREL]
Machine Code Analysis
Iterations: 100
Instructions: 7200
Total Cycles: 7648
Total uOps: 9300
Dispatch Width: 4
uOps Per Cycle: 1.22
IPC: 0.94
Block RThroughput: 23.3
Instruction Info:
[1]: #uOps
[2]: Latency
[3]: RThroughput
[4]: MayLoad
[5]: MayStore
[6]: HasSideEffects (U)
[1] [2] [3] [4] [5] [6] Instructions:
2 5 1.00 * push rbp
2 5 1.00 * push r15
2 5 1.00 * push r14
2 5 1.00 * push r13
2 5 1.00 * push r12
2 5 1.00 * push rbx
1 1 0.33 sub rsp, 24
1 5 0.50 * mov r12, qword ptr [rdi + 16]
1 1 0.33 mov r13, r12
1 1 0.33 sub r13, rsi
1 1 1.00 jb .LBB6_10
1 1 0.33 mov rbx, rdi
1 5 0.50 * mov rax, qword ptr [rdi]
1 1 0.33 mov rcx, rax
1 1 0.33 sub rcx, r12
1 1 0.33 cmp rdx, rcx
1 1 1.00 jbe .LBB6_4
1 1 0.33 add r12, rdx
1 1 1.00 jae .LBB6_7
1 0 0.25 xor eax, eax
1 1 1.00 jmp .LBB6_6
1 5 0.50 * mov rax, qword ptr [rbx + 8]
1 1 0.33 add r12, rdx
1 1 0.50 lea rcx, [rsi + 2*rsi]
1 1 0.50 lea r14, [rax + 2*rcx]
1 1 0.33 add rdx, rdx
1 1 0.50 lea r15, [rdx + 2*rdx]
1 1 0.50 lea rdi, [r14 + r15]
1 1 0.33 add r13, r13
1 1 0.50 lea rdx, [2*r13]
1 1 0.33 add rdx, r13
1 1 0.33 mov rsi, r14
4 7 1.00 * call qword ptr [rip + memmove@GOTPCREL]
1 1 0.33 mov rdi, r14
1 0 0.25 xor esi, esi
1 1 0.33 mov rdx, r15
4 7 1.00 * call qword ptr [rip + memset@GOTPCREL]
1 1 1.00 * mov qword ptr [rbx + 16], r12
1 1 0.33 mov al, 1
1 1 0.33 add rsp, 24
1 6 0.50 * pop rbx
1 6 0.50 * pop r12
1 6 0.50 * pop r13
1 6 0.50 * pop r14
1 6 0.50 * pop r15
1 6 0.50 * pop rbp
1 1 1.00 U ret
1 1 0.33 mov r15, rsi
1 1 0.33 mov rbp, rdx
1 1 0.50 lea rcx, [rax + rax]
1 1 0.33 cmp r12, rcx
3 3 1.00 cmova rcx, r12
1 1 0.33 cmp rcx, 5
1 1 0.33 mov r14d, 4
2 2 0.67 cmovae r14, rcx
1 5 0.50 * mov rdx, qword ptr [rbx + 8]
1 1 0.33 mov rdi, rsp
1 1 0.33 mov rsi, rax
1 1 0.33 mov rcx, r14
3 5 1.00 call <alloc::raw_vec::RawVecInner>::finish_grow
2 6 0.50 * cmp dword ptr [rsp], 1
1 1 1.00 je .LBB6_3
1 5 0.50 * mov rax, qword ptr [rsp + 8]
1 1 1.00 * mov qword ptr [rbx + 8], rax
1 1 1.00 * mov qword ptr [rbx], r14
1 1 0.33 mov rdx, rbp
1 1 0.33 mov rsi, r15
1 1 1.00 jmp .LBB6_5
1 1 0.50 lea rdi, [rip + .Lanon.HASH.1]
1 1 0.50 lea rdx, [rip + .Lanon.HASH.3]
1 1 0.33 mov esi, 37
4 7 1.00 * call qword ptr [rip + core::panicking::panic@GOTPCREL]
Resources:
[0] - SBDivider
[1] - SBFPDivider
[2] - SBPort0
[3] - SBPort1
[4] - SBPort4
[5] - SBPort5
[6.0] - SBPort23
[6.1] - SBPort23
Resource pressure per iteration:
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
- - 17.02 16.50 13.00 19.48 14.00 14.00
Resource pressure by instruction:
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
- - - - 1.00 - 0.98 0.02 push rbp
- - - - 1.00 - 0.02 0.98 push r15
- - - - 1.00 - 0.99 0.01 push r14
- - - - 1.00 - 0.01 0.99 push r13
- - - - 1.00 - 0.99 0.01 push r12
- - - - 1.00 - 0.01 0.99 push rbx
- - 0.49 0.51 - - - - sub rsp, 24
- - - - - - 0.04 0.96 mov r12, qword ptr [rdi + 16]
- - 0.49 0.50 - 0.01 - - mov r13, r12
- - 0.48 0.51 - 0.01 - - sub r13, rsi
- - - - - 1.00 - - jb .LBB6_10
- - 0.49 0.49 - 0.02 - - mov rbx, rdi
- - - - - - 0.97 0.03 mov rax, qword ptr [rdi]
- - 0.51 0.49 - - - - mov rcx, rax
- - 0.49 0.02 - 0.49 - - sub rcx, r12
- - 0.49 0.50 - 0.01 - - cmp rdx, rcx
- - - - - 1.00 - - jbe .LBB6_4
- - 0.02 0.49 - 0.49 - - add r12, rdx
- - - - - 1.00 - - jae .LBB6_7
- - - - - - - - xor eax, eax
- - - - - 1.00 - - jmp .LBB6_6
- - - - - - 0.97 0.03 mov rax, qword ptr [rbx + 8]
- - 0.51 0.49 - - - - add r12, rdx
- - 0.49 0.51 - - - - lea rcx, [rsi + 2*rsi]
- - 0.50 0.50 - - - - lea r14, [rax + 2*rcx]
- - 0.51 0.49 - - - - add rdx, rdx
- - 0.50 0.50 - - - - lea r15, [rdx + 2*rdx]
- - 0.49 0.51 - - - - lea rdi, [r14 + r15]
- - 0.50 0.49 - 0.01 - - add r13, r13
- - 0.51 0.49 - - - - lea rdx, [2*r13]
- - 0.01 0.01 - 0.98 - - add rdx, r13
- - 0.01 - - 0.99 - - mov rsi, r14
- - - - 1.00 1.00 1.98 0.02 call qword ptr [rip + memmove@GOTPCREL]
- - 0.49 0.50 - 0.01 - - mov rdi, r14
- - - - - - - - xor esi, esi
- - 0.50 0.50 - - - - mov rdx, r15
- - - - 1.00 1.00 1.96 0.04 call qword ptr [rip + memset@GOTPCREL]
- - - - 1.00 - 0.01 0.99 mov qword ptr [rbx + 16], r12
- - 0.50 - - 0.50 - - mov al, 1
- - 0.51 0.49 - - - - add rsp, 24
- - - - - - 0.02 0.98 pop rbx
- - - - - - 0.03 0.97 pop r12
- - - - - - 0.03 0.97 pop r13
- - - - - - 0.97 0.03 pop r14
- - - - - - 0.03 0.97 pop r15
- - - - - - 0.01 0.99 pop rbp
- - - - - 1.00 - - ret
- - 0.49 0.51 - - - - mov r15, rsi
- - 0.51 0.49 - - - - mov rbp, rdx
- - 0.49 0.51 - - - - lea rcx, [rax + rax]
- - 0.49 0.50 - 0.01 - - cmp r12, rcx
- - 1.04 0.50 - 1.46 - - cmova rcx, r12
- - 0.49 0.49 - 0.02 - - cmp rcx, 5
- - 0.50 - - 0.50 - - mov r14d, 4
- - 0.50 0.51 - 0.99 - - cmovae r14, rcx
- - - - - - 0.97 0.03 mov rdx, qword ptr [rbx + 8]
- - - 0.51 - 0.49 - - mov rdi, rsp
- - 0.01 0.50 - 0.49 - - mov rsi, rax
- - 0.49 0.50 - 0.01 - - mov rcx, r14
- - - - 1.00 1.00 0.99 0.01 call <alloc::raw_vec::RawVecInner>::finish_grow
- - 0.51 0.49 - - 0.50 0.50 cmp dword ptr [rsp], 1
- - - - - 1.00 - - je .LBB6_3
- - - - - - 0.50 0.50 mov rax, qword ptr [rsp + 8]
- - - - 1.00 - 0.99 0.01 mov qword ptr [rbx + 8], rax
- - - - 1.00 - 0.01 0.99 mov qword ptr [rbx], r14
- - 0.49 0.50 - 0.01 - - mov rdx, rbp
- - 0.50 0.01 - 0.49 - - mov rsi, r15
- - - - - 1.00 - - jmp .LBB6_5
- - 0.01 0.99 - - - - lea rdi, [rip + .Lanon.HASH.1]
- - 0.99 0.01 - - - - lea rdx, [rip + .Lanon.HASH.3]
- - 0.02 0.49 - 0.49 - - mov esi, 37
- - - - 1.00 1.00 0.02 1.98 call qword ptr [rip + core::panicking::panic@GOTPCREL]
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.