1use std::sync::Arc;
6use std::sync::atomic::{AtomicBool, Ordering};
7
8use crate::mm::memory::MemoryObject;
9use crate::task::Kernel;
10use crate::vfs::OutputBuffer;
11use fuchsia_runtime::vmar_root_self;
12use shared_buffer::SharedBuffer;
13use starnix_sync::Mutex;
14use starnix_types::PAGE_SIZE;
15use starnix_uapi::errors::Errno;
16use starnix_uapi::{errno, error, from_status_like_fdio};
17use zerocopy::native_endian::{I32, U16, U32, U64};
18use zerocopy::{Immutable, IntoBytes, Unaligned};
19use zx::{BootInstant, BootTimeline};
20
21const DEFAULT_RING_BUFFER_SIZE_BYTES: u64 = 2097152;
24
25const PAGE_HEADER_SIZE: u64 = 2 * std::mem::size_of::<u64>() as u64;
27const COMMIT_FIELD_OFFSET: u64 = std::mem::size_of::<u64>() as u64;
28
29const FTRACE_PRINT_ID: U16 = U16::new(5);
31
32const DROPPED_PAGES: &str = "dropped_pages";
34
35#[repr(C)]
36#[derive(Debug, Default, IntoBytes, Immutable, Unaligned)]
37struct PrintEventHeader {
38 common_type: U16,
39 common_flags: u8,
40 common_preempt_count: u8,
41 common_pid: I32,
42 ip: U64,
43}
44
45#[repr(C)]
46#[derive(Debug, IntoBytes, Immutable, Unaligned)]
47struct PrintEvent {
48 header: PrintEventHeader,
49}
50
51impl PrintEvent {
52 fn new(pid: i32) -> Self {
53 Self {
54 header: PrintEventHeader {
55 common_type: FTRACE_PRINT_ID,
56 common_pid: I32::new(pid),
57 ..Default::default()
59 },
60 }
61 }
62
63 fn size(&self) -> usize {
64 std::mem::size_of::<PrintEventHeader>()
65 }
66}
67
68#[repr(C)]
69#[derive(Debug, Default, IntoBytes, PartialEq, Immutable, Unaligned)]
70struct TraceEventHeader {
71 time_delta: U32,
75
76 data: U32,
79}
80
81impl TraceEventHeader {
82 fn new(size: usize) -> Self {
83 let size = (std::mem::size_of::<u32>() + size) as u32;
86 Self { time_delta: U32::new(0), data: U32::new(size) }
87 }
88
89 fn set_time_delta(&mut self, nanos: u32) {
90 self.time_delta = U32::new(nanos << 5);
91 }
92}
93
94#[repr(C)]
95#[derive(Debug, IntoBytes, Immutable, Unaligned)]
96pub struct TraceEvent {
97 header: TraceEventHeader, event: PrintEvent,
105}
106
107impl TraceEvent {
108 pub fn new(pid: i32, data_len: usize) -> Self {
109 let event = PrintEvent::new(pid);
110 let header = TraceEventHeader::new(event.size() + data_len + 1);
112 Self { header, event }
113 }
114
115 fn size(&self) -> usize {
116 std::mem::size_of::<u32>() + self.header.data.get() as usize
118 }
119
120 fn set_timestamp(&mut self, timestamp: BootInstant, prev_timestamp: BootInstant) {
121 debug_assert!(timestamp >= prev_timestamp, "Timestamp must be >= prev_timestamp");
125 let nanos: u32 = (timestamp - prev_timestamp).into_nanos().try_into().unwrap_or(0);
126 self.header.set_time_delta(nanos);
127 }
128}
129
130struct TraceEventQueueMetadata {
131 head: u64,
133
134 commit: u64,
139
140 tail: u64,
143
144 max_event_size: u64,
146
147 prev_timestamp: BootInstant,
150
151 is_readable: bool,
155
156 overwrite: bool,
158
159 dropped_pages: u64,
162
163 mapping: Option<SharedBuffer>, }
167
168impl Drop for TraceEventQueueMetadata {
169 fn drop(&mut self) {
170 if let Some(ref mut buf) = self.mapping {
171 let (addr, size): (*mut u8, usize) = buf.as_ptr_len();
172 let addr = addr as usize;
173
174 unsafe {
179 let _ = vmar_root_self().unmap(addr, size);
180 }
181 }
182 }
183}
184
185impl TraceEventQueueMetadata {
186 fn new() -> Self {
187 Self {
188 head: 0,
189 commit: PAGE_HEADER_SIZE,
190 tail: PAGE_HEADER_SIZE,
191 max_event_size: *PAGE_SIZE - PAGE_HEADER_SIZE,
192 prev_timestamp: BootInstant::get(),
193 is_readable: false,
194 overwrite: true,
195 dropped_pages: 0,
196 mapping: None,
197 }
198 }
199
200 fn head_page_offset(&self) -> u64 {
202 self.head - (self.head % *PAGE_SIZE)
203 }
204
205 fn commit_page_offset(&self) -> u64 {
207 self.commit - (self.commit % *PAGE_SIZE)
208 }
209
210 fn tail_page_offset(&self) -> u64 {
212 self.tail - (self.tail % *PAGE_SIZE)
213 }
214
215 fn commit_field_offset(&self) -> u64 {
217 self.commit_page_offset() + COMMIT_FIELD_OFFSET
218 }
219
220 fn reserve(&mut self, event_size: u64) -> Result<u64, Errno> {
229 if event_size > self.max_event_size {
230 return error!(EINVAL);
231 }
232
233 let prev_tail_page = self.tail_page_offset();
234 let mut reserve_start = self.tail;
235 let maybe_new_tail = (self.tail + event_size as u64) % DEFAULT_RING_BUFFER_SIZE_BYTES;
236 let maybe_new_tail_page = maybe_new_tail - (maybe_new_tail % *PAGE_SIZE);
237
238 if prev_tail_page != maybe_new_tail_page {
239 if maybe_new_tail_page == self.head_page_offset() {
243 if self.overwrite {
244 self.head += *PAGE_SIZE;
245 self.dropped_pages += 1;
246 } else {
247 return error!(ENOMEM);
248 }
249 }
250
251 reserve_start = maybe_new_tail_page + PAGE_HEADER_SIZE;
253 }
254 self.tail = reserve_start + event_size as u64;
255
256 Ok(reserve_start)
257 }
258
259 fn commit(&mut self, event_size: u64) {
262 let prev_commit_page = self.commit_page_offset();
263 self.commit = (self.commit + event_size as u64) % DEFAULT_RING_BUFFER_SIZE_BYTES;
264
265 let new_commit_page = self.commit_page_offset();
266 if prev_commit_page != new_commit_page {
267 self.commit = new_commit_page + PAGE_HEADER_SIZE + event_size as u64;
268 self.is_readable = true;
270 }
271 }
272
273 fn read(&mut self) -> Result<u64, Errno> {
275 if !self.is_readable {
276 return error!(EAGAIN);
277 }
278
279 let head_page = self.head_page_offset();
280 self.head = (self.head + *PAGE_SIZE) % DEFAULT_RING_BUFFER_SIZE_BYTES;
281
282 if self.head_page_offset() == self.commit_page_offset() {
284 self.is_readable = false;
285 }
286
287 Ok(head_page)
288 }
289}
290
291pub struct TraceEventQueue {
293 tracing_enabled: AtomicBool,
296
297 metadata: Mutex<TraceEventQueueMetadata>,
299
300 ring_buffer: MemoryObject,
319
320 tracefs_node: fuchsia_inspect::Node,
322
323 pub async_id_read: fuchsia_trace::Id,
325
326 pub async_id_write: fuchsia_trace::Id,
328
329 pub cpu_id: u32,
331}
332
333const STATIC_READ_TRACK_NAMES: [&str; 8] = [
334 "Tracefs read 0",
335 "Tracefs read 1",
336 "Tracefs read 2",
337 "Tracefs read 3",
338 "Tracefs read 4",
339 "Tracefs read 5",
340 "Tracefs read 6",
341 "Tracefs read 7",
342];
343
344const STATIC_WRITE_TRACK_NAMES: [&str; 8] = [
345 "Tracefs write 0",
346 "Tracefs write 1",
347 "Tracefs write 2",
348 "Tracefs write 3",
349 "Tracefs write 4",
350 "Tracefs write 5",
351 "Tracefs write 6",
352 "Tracefs write 7",
353];
354
355impl<'a> TraceEventQueue {
356 pub fn new(inspect_node: &fuchsia_inspect::Node, cpu_id: u32) -> Result<Self, Errno> {
357 let tracefs_node = inspect_node.create_child("tracefs");
358 let metadata = TraceEventQueueMetadata::new();
359 let ring_buffer: MemoryObject = zx::Vmo::create_with_opts(zx::VmoOptions::RESIZABLE, 0)
360 .map_err(|_| errno!(ENOMEM))?
361 .into();
362 let ring_buffer = ring_buffer.with_zx_name(b"starnix:tracefs");
363 let async_id_read = fuchsia_trace::Id::random();
364 let async_id_write = fuchsia_trace::Id::random();
365
366 Ok(Self {
367 tracing_enabled: AtomicBool::new(false),
368 metadata: Mutex::new(metadata),
369 ring_buffer,
370 tracefs_node,
371 async_id_read,
372 async_id_write,
373 cpu_id,
374 })
375 }
376
377 pub fn read_track_name(&self) -> std::borrow::Cow<'static, str> {
378 if let Some(&name) = STATIC_READ_TRACK_NAMES.get(self.cpu_id as usize) {
379 std::borrow::Cow::Borrowed(name)
380 } else {
381 std::borrow::Cow::Owned(format!("Tracefs read {}", self.cpu_id))
382 }
383 }
384
385 pub fn write_track_name(&self) -> std::borrow::Cow<'static, str> {
386 if let Some(&name) = STATIC_WRITE_TRACK_NAMES.get(self.cpu_id as usize) {
387 std::borrow::Cow::Borrowed(name)
388 } else {
389 std::borrow::Cow::Owned(format!("Tracefs write {}", self.cpu_id))
390 }
391 }
392
393 pub fn from(kernel: &Kernel) -> Arc<Self> {
397 kernel.expando.get_or_init(|| {
398 TraceEventQueue::new(&kernel.inspect_node, 0)
399 .expect("TraceEventQueue constructed with valid options")
400 })
401 }
402
403 pub fn is_enabled(&self) -> bool {
404 self.tracing_enabled.load(Ordering::Relaxed)
405 }
406
407 pub fn enable(&self) -> Result<(), Errno> {
408 let mut metadata = self.metadata.lock();
411 metadata.prev_timestamp = BootInstant::get();
412 self.ring_buffer
413 .set_size(DEFAULT_RING_BUFFER_SIZE_BYTES)
414 .map_err(|e| from_status_like_fdio!(e))?;
415
416 let vmo = self.ring_buffer.as_vmo().expect("Trace FS's memory must be VMO backed.");
417 let addr = vmar_root_self()
418 .map(
419 0,
420 &vmo,
421 0,
422 DEFAULT_RING_BUFFER_SIZE_BYTES as usize,
423 zx::VmarFlags::PERM_READ | zx::VmarFlags::PERM_WRITE,
424 )
425 .map_err(|e| from_status_like_fdio!(e))?;
426
427 metadata.mapping = Some(unsafe {
437 SharedBuffer::new(addr as *mut u8, DEFAULT_RING_BUFFER_SIZE_BYTES as usize)
438 });
439
440 self.initialize_page(0, metadata.prev_timestamp)?;
441 self.tracing_enabled.store(true, Ordering::Relaxed);
442 Ok(())
443 }
444
445 pub fn disable(&self) -> Result<(), Errno> {
448 let mut metadata = self.metadata.lock();
451 self.tracefs_node.record_uint(DROPPED_PAGES, metadata.dropped_pages);
452 *metadata = TraceEventQueueMetadata::new();
453 self.ring_buffer.set_size(0).map_err(|e| from_status_like_fdio!(e))?;
454 self.tracing_enabled.store(false, Ordering::Relaxed);
455
456 Ok(())
457 }
458
459 pub fn read(&self, buf: &mut dyn OutputBuffer) -> Result<usize, Errno> {
464 let offset = {
466 let mut metadata = self.metadata.lock();
467 metadata.read()?
468 };
469
470 buf.write_all(
475 &self.ring_buffer.read_to_vec(offset, *PAGE_SIZE).map_err(|_| errno!(ENOMEM))?,
476 )
477 }
478
479 pub fn push_event(
486 &self,
487 mut event: TraceEvent,
488 data: &[u8],
489 ) -> Result<zx::Duration<BootTimeline>, Errno> {
490 let mut metadata = self.metadata.lock();
491 if metadata.mapping.is_none() {
492 return Err(errno!(ENOMEM));
493 };
494
495 let timestamp = BootInstant::get();
504
505 event.set_timestamp(timestamp, metadata.prev_timestamp);
506
507 let old_tail_page = metadata.tail_page_offset();
509 let offset = metadata.reserve(event.size() as u64)?;
510
511 let new_tail_page = metadata.tail_page_offset();
513 if new_tail_page != old_tail_page {
514 self.initialize_page(new_tail_page, metadata.prev_timestamp)?;
515 }
516
517 let bytes = event.as_bytes();
519 if let Some(ref mapping) = metadata.mapping {
520 mapping.write_at(offset as usize, bytes);
521 mapping.write_at(offset as usize + bytes.len(), data);
522 mapping.write_at(offset as usize + bytes.len() + data.len(), b"\n");
523 }
524 metadata.commit(event.size() as u64);
525
526 let new_commit = ((metadata.commit % *PAGE_SIZE) - PAGE_HEADER_SIZE).to_le_bytes();
528 let commit_offset = metadata.commit_field_offset() as usize;
529 if let Some(ref mapping) = metadata.mapping {
530 mapping.write_at(commit_offset, &new_commit);
531 }
532
533 let delta = timestamp - metadata.prev_timestamp;
534 metadata.prev_timestamp = timestamp;
535
536 Ok(delta)
537 }
538
539 #[cfg(test)]
540 fn prev_timestamp(&self) -> BootInstant {
542 self.metadata.lock().prev_timestamp
543 }
544
545 fn initialize_page(&self, offset: u64, prev_timestamp: BootInstant) -> Result<(), Errno> {
548 fuchsia_trace::duration!(starnix_logging::CATEGORY_TRACE_META, "initialize_page");
549 self.ring_buffer
550 .write(&prev_timestamp.into_nanos().to_le_bytes(), offset)
551 .map_err(|e| from_status_like_fdio!(e))?;
552 let timestamp_size = std::mem::size_of::<BootInstant>() as u64;
553 self.ring_buffer
554 .op_range(zx::VmoOp::ZERO, offset + timestamp_size, *PAGE_SIZE - timestamp_size)
555 .map_err(|e| from_status_like_fdio!(e))?;
556 Ok(())
557 }
558}
559
560#[cfg(test)]
561mod tests {
562 use super::{
563 DEFAULT_RING_BUFFER_SIZE_BYTES, PAGE_HEADER_SIZE, TraceEvent, TraceEventQueue,
564 TraceEventQueueMetadata,
565 };
566 use crate::vfs::OutputBuffer;
567 use crate::vfs::buffers::VecOutputBuffer;
568 use starnix_types::PAGE_SIZE;
569 use starnix_uapi::error;
570
571 #[fuchsia::test]
572 fn metadata_errors() {
573 let mut metadata = TraceEventQueueMetadata::new();
574 assert_eq!(metadata.read(), error!(EAGAIN));
575 assert_eq!(metadata.reserve(*PAGE_SIZE), error!(EINVAL));
576 }
577
578 #[fuchsia::test]
579 fn metadata_push_event_simple() {
580 let mut metadata = TraceEventQueueMetadata::new();
581 let event_size = 30;
582 let reserved_offset = metadata.reserve(event_size).expect("reserve failed");
583 assert_eq!(reserved_offset, PAGE_HEADER_SIZE);
584 assert_eq!(metadata.head, 0);
585 assert_eq!(metadata.commit, PAGE_HEADER_SIZE);
586 assert_eq!(metadata.tail, PAGE_HEADER_SIZE + event_size);
587
588 metadata.commit(event_size);
589 assert_eq!(metadata.head, 0);
590 assert_eq!(metadata.commit, PAGE_HEADER_SIZE + event_size);
591 assert_eq!(metadata.tail, PAGE_HEADER_SIZE + event_size);
592 }
593
594 #[fuchsia::test]
595 fn metadata_push_event_next_page() {
596 let mut metadata = TraceEventQueueMetadata::new();
597 metadata.commit = *PAGE_SIZE - 1;
599 metadata.tail = *PAGE_SIZE - 1;
600
601 let event_size = 30;
603 let reserved_offset = metadata.reserve(event_size).expect("reserve failed");
604 assert_eq!(reserved_offset, *PAGE_SIZE + PAGE_HEADER_SIZE);
605 assert_eq!(metadata.head, 0);
606 assert_eq!(metadata.commit, *PAGE_SIZE - 1);
607 assert_eq!(metadata.tail, *PAGE_SIZE + PAGE_HEADER_SIZE + event_size);
608
609 metadata.commit(event_size);
611 assert_eq!(metadata.head, 0);
612 assert_eq!(metadata.commit, *PAGE_SIZE + PAGE_HEADER_SIZE + event_size);
613 assert_eq!(metadata.tail, *PAGE_SIZE + PAGE_HEADER_SIZE + event_size);
614 }
615
616 #[fuchsia::test]
617 fn metadata_reserve_full() {
618 let mut metadata = TraceEventQueueMetadata::new();
619 metadata.commit = DEFAULT_RING_BUFFER_SIZE_BYTES;
620 metadata.tail = DEFAULT_RING_BUFFER_SIZE_BYTES;
621
622 metadata.overwrite = false;
624 assert_eq!(metadata.reserve(30), error!(ENOMEM));
625
626 metadata.overwrite = true;
628 assert_eq!(metadata.reserve(30), Ok(PAGE_HEADER_SIZE));
629 assert_eq!(metadata.head_page_offset(), *PAGE_SIZE);
630 assert_eq!(metadata.dropped_pages, 1);
631 }
632
633 #[fuchsia::test]
634 fn metadata_read_simple() {
635 let mut metadata = TraceEventQueueMetadata::new();
636 metadata.is_readable = true;
637
638 assert_eq!(metadata.read(), Ok(0));
639 assert_eq!(metadata.head, *PAGE_SIZE);
640 }
641
642 #[fuchsia::test]
643 fn metadata_read_meets_commit() {
644 let mut metadata = TraceEventQueueMetadata::new();
645 metadata.is_readable = true;
646 metadata.commit = *PAGE_SIZE + PAGE_HEADER_SIZE + 30;
647
648 assert_eq!(metadata.read(), Ok(0));
649 assert_eq!(metadata.head, *PAGE_SIZE);
650 assert!(!metadata.is_readable);
651 assert_eq!(metadata.read(), error!(EAGAIN));
652 }
653
654 #[fuchsia::test]
655 fn read_empty_queue() {
656 let inspect_node = fuchsia_inspect::Node::default();
657 let queue = TraceEventQueue::new(&inspect_node, 0).expect("create queue");
658 let mut buffer = VecOutputBuffer::new(*PAGE_SIZE as usize);
659 assert_eq!(queue.read(&mut buffer), error!(EAGAIN));
660 }
661
662 #[fuchsia::test]
663 fn enable_disable_queue() {
664 let inspect_node = fuchsia_inspect::Node::default();
665 let queue = TraceEventQueue::new(&inspect_node, 0).expect("create queue");
666 assert_eq!(queue.ring_buffer.get_size(), 0);
667
668 assert!(queue.enable().is_ok());
670 assert_eq!(queue.ring_buffer.get_size(), DEFAULT_RING_BUFFER_SIZE_BYTES);
671
672 let data = b"B|1234|slice_name";
674 let event = TraceEvent::new(1234, data.len());
675 let event_size = event.size() as u64;
676 let result = queue.push_event(event, data);
677 assert!(result.is_ok());
678 assert!(result.ok().expect("delta").into_nanos() > 0);
679 assert_eq!(queue.metadata.lock().commit, PAGE_HEADER_SIZE + event_size);
680
681 assert!(queue.disable().is_ok());
683 assert_eq!(queue.ring_buffer.get_size(), 0);
684 assert_eq!(queue.metadata.lock().commit, PAGE_HEADER_SIZE);
685 }
686
687 #[fuchsia::test]
688 fn create_trace_event() {
689 let event = TraceEvent::new(1234, b"B|1234|slice_name".len());
691 let event_size = event.size();
692 assert_eq!(event_size, 42);
693 }
694
695 #[fuchsia::test]
697 fn single_trace_event_fails_read() {
698 let inspect_node = fuchsia_inspect::Node::default();
699 let queue = TraceEventQueue::new(&inspect_node, 0).expect("create queue");
700 queue.enable().expect("enable queue");
701 let data = b"B|1234|slice_name";
703 let event = TraceEvent::new(1234, data.len());
704
705 let result = queue.push_event(event, data);
707 assert!(result.is_ok());
708 assert!(result.ok().expect("delta").into_nanos() > 0);
709
710 let mut buffer = VecOutputBuffer::new(*PAGE_SIZE as usize);
711 assert_eq!(queue.read(&mut buffer), error!(EAGAIN));
712 }
713
714 #[fuchsia::test]
715 fn page_overflow() {
716 let inspect_node = fuchsia_inspect::Node::default();
717 let queue = TraceEventQueue::new(&inspect_node, 0).expect("create queue");
718 queue.enable().expect("enable queue");
719 let queue_start_timestamp = queue.prev_timestamp();
720 let pid = 1234;
721 let data = b"B|1234|loooooooooooooooooooooooooooooooooooooooooooooooooooooooooo\
722 ooooooooooooooooooooooooooooooooooooooooooooooooooooooooongevent";
723 let expected_event = TraceEvent::new(pid, data.len());
724 assert_eq!(expected_event.size(), 155);
725
726 for _ in 0..27 {
728 let event = TraceEvent::new(pid, data.len());
729 let result = queue.push_event(event, data);
730 assert!(result.is_ok());
731 assert!(result.ok().expect("delta").into_nanos() > 0);
732 }
733
734 let mut buffer = VecOutputBuffer::new(*PAGE_SIZE as usize);
736 assert_eq!(queue.read(&mut buffer), Ok(*PAGE_SIZE as usize));
737 assert_eq!(buffer.bytes_written() as u64, *PAGE_SIZE);
738
739 let mut expected_page_header: Vec<u8> = vec![];
740 expected_page_header
741 .extend_from_slice(&(queue_start_timestamp.into_nanos() as u64).to_le_bytes());
742 expected_page_header.extend_from_slice(&(expected_event.size() * 26).to_le_bytes());
743
744 assert!(buffer.data().starts_with(&expected_page_header));
745
746 let mut buffer = VecOutputBuffer::new(*PAGE_SIZE as usize);
748 assert_eq!(queue.read(&mut buffer), error!(EAGAIN));
749 }
750}