1use std::sync::Arc;
6use std::sync::atomic::{AtomicBool, Ordering};
7
8use crate::mm::memory::MemoryObject;
9use crate::task::Kernel;
10use crate::vfs::OutputBuffer;
11use fuchsia_runtime::vmar_root_self;
12use shared_buffer::SharedBuffer;
13use starnix_sync::Mutex;
14use starnix_types::PAGE_SIZE;
15use starnix_uapi::errors::Errno;
16use starnix_uapi::{errno, error, from_status_like_fdio};
17use zerocopy::native_endian::{I32, U16, U32, U64};
18use zerocopy::{Immutable, IntoBytes, Unaligned};
19use zx::{BootInstant, BootTimeline};
20
21const DEFAULT_RING_BUFFER_SIZE_BYTES: u64 = 2097152;
24
25const PAGE_HEADER_SIZE: u64 = 2 * std::mem::size_of::<u64>() as u64;
27const COMMIT_FIELD_OFFSET: u64 = std::mem::size_of::<u64>() as u64;
28
29const FTRACE_PRINT_ID: U16 = U16::new(5);
31
32const DROPPED_PAGES: &str = "dropped_pages";
34
35#[repr(C)]
36#[derive(Debug, Default, IntoBytes, Immutable, Unaligned)]
37struct PrintEventHeader {
38 common_type: U16,
39 common_flags: u8,
40 common_preempt_count: u8,
41 common_pid: I32,
42 ip: U64,
43}
44
45#[repr(C)]
46#[derive(Debug, IntoBytes, Immutable, Unaligned)]
47struct PrintEvent {
48 header: PrintEventHeader,
49}
50
51impl PrintEvent {
52 fn new(pid: i32) -> Self {
53 Self {
54 header: PrintEventHeader {
55 common_type: FTRACE_PRINT_ID,
56 common_pid: I32::new(pid),
57 ..Default::default()
59 },
60 }
61 }
62
63 fn size(&self) -> usize {
64 std::mem::size_of::<PrintEventHeader>()
65 }
66}
67
68#[repr(C)]
69#[derive(Debug, Default, IntoBytes, PartialEq, Immutable, Unaligned)]
70struct TraceEventHeader {
71 time_delta: U32,
75
76 data: U32,
79}
80
81impl TraceEventHeader {
82 fn new(size: usize) -> Self {
83 let size = (std::mem::size_of::<u32>() + size) as u32;
86 Self { time_delta: U32::new(0), data: U32::new(size) }
87 }
88
89 fn set_time_delta(&mut self, nanos: u32) {
90 self.time_delta = U32::new(nanos << 5);
91 }
92}
93
94#[repr(C)]
95#[derive(Debug, IntoBytes, Immutable, Unaligned)]
96pub struct TraceEvent {
97 header: TraceEventHeader, event: PrintEvent,
105}
106
107impl TraceEvent {
108 pub fn new(pid: i32, data_len: usize) -> Self {
109 let event = PrintEvent::new(pid);
110 let header = TraceEventHeader::new(event.size() + data_len + 1);
112 Self { header, event }
113 }
114
115 fn size(&self) -> usize {
116 std::mem::size_of::<u32>() + self.header.data.get() as usize
118 }
119
120 fn set_timestamp(&mut self, timestamp: BootInstant, prev_timestamp: BootInstant) {
121 debug_assert!(timestamp >= prev_timestamp, "Timestamp must be >= prev_timestamp");
125 let nanos: u32 = (timestamp - prev_timestamp).into_nanos().try_into().unwrap_or(0);
126 self.header.set_time_delta(nanos);
127 }
128}
129
130struct TraceEventQueueMetadata {
131 head: u64,
133
134 commit: u64,
139
140 tail: u64,
143
144 max_event_size: u64,
146
147 prev_timestamp: BootInstant,
150
151 is_readable: bool,
155
156 overwrite: bool,
158
159 dropped_pages: u64,
162
163 mapping: Option<SharedBuffer>, }
167
168impl Drop for TraceEventQueueMetadata {
169 fn drop(&mut self) {
170 if let Some(ref mut buf) = self.mapping {
171 let (addr, size): (*mut u8, usize) = buf.as_ptr_len();
172 let addr = addr as usize;
173
174 unsafe {
179 let _ = vmar_root_self().unmap(addr, size);
180 }
181 }
182 }
183}
184
185impl TraceEventQueueMetadata {
186 fn new() -> Self {
187 Self {
188 head: 0,
189 commit: PAGE_HEADER_SIZE,
190 tail: PAGE_HEADER_SIZE,
191 max_event_size: *PAGE_SIZE - PAGE_HEADER_SIZE,
192 prev_timestamp: BootInstant::get(),
193 is_readable: false,
194 overwrite: true,
195 dropped_pages: 0,
196 mapping: None,
197 }
198 }
199
200 fn head_page_offset(&self) -> u64 {
202 self.head - (self.head % *PAGE_SIZE)
203 }
204
205 fn commit_page_offset(&self) -> u64 {
207 self.commit - (self.commit % *PAGE_SIZE)
208 }
209
210 fn tail_page_offset(&self) -> u64 {
212 self.tail - (self.tail % *PAGE_SIZE)
213 }
214
215 fn commit_field_offset(&self) -> u64 {
217 self.commit_page_offset() + COMMIT_FIELD_OFFSET
218 }
219
220 fn reserve(&mut self, event_size: u64) -> Result<u64, Errno> {
229 if event_size > self.max_event_size {
230 return error!(EINVAL);
231 }
232
233 let prev_tail_page = self.tail_page_offset();
234 let mut reserve_start = self.tail;
235 let maybe_new_tail = (self.tail + event_size as u64) % DEFAULT_RING_BUFFER_SIZE_BYTES;
236 let maybe_new_tail_page = maybe_new_tail - (maybe_new_tail % *PAGE_SIZE);
237
238 if prev_tail_page != maybe_new_tail_page {
239 if maybe_new_tail_page == self.head_page_offset() {
243 if self.overwrite {
244 self.head += *PAGE_SIZE;
245 self.dropped_pages += 1;
246 } else {
247 return error!(ENOMEM);
248 }
249 }
250
251 reserve_start = maybe_new_tail_page + PAGE_HEADER_SIZE;
253 }
254 self.tail = reserve_start + event_size as u64;
255
256 Ok(reserve_start)
257 }
258
259 fn commit(&mut self, event_size: u64) {
262 let prev_commit_page = self.commit_page_offset();
263 self.commit = (self.commit + event_size as u64) % DEFAULT_RING_BUFFER_SIZE_BYTES;
264
265 let new_commit_page = self.commit_page_offset();
266 if prev_commit_page != new_commit_page {
267 self.commit = new_commit_page + PAGE_HEADER_SIZE + event_size as u64;
268 self.is_readable = true;
270 }
271 }
272
273 fn read(&mut self) -> Result<u64, Errno> {
275 if !self.is_readable {
276 return error!(EAGAIN);
277 }
278
279 let head_page = self.head_page_offset();
280 self.head = (self.head + *PAGE_SIZE) % DEFAULT_RING_BUFFER_SIZE_BYTES;
281
282 if self.head_page_offset() == self.commit_page_offset() {
284 self.is_readable = false;
285 }
286
287 Ok(head_page)
288 }
289}
290
291pub struct TraceEventQueue {
293 metadata: Mutex<TraceEventQueueMetadata>,
295
296 ring_buffer: MemoryObject,
315
316 pub async_id_read: fuchsia_trace::Id,
318
319 pub async_id_write: fuchsia_trace::Id,
321
322 pub cpu_id: u32,
324}
325
326const STATIC_READ_TRACK_NAMES: [&str; 8] = [
327 "Tracefs read 0",
328 "Tracefs read 1",
329 "Tracefs read 2",
330 "Tracefs read 3",
331 "Tracefs read 4",
332 "Tracefs read 5",
333 "Tracefs read 6",
334 "Tracefs read 7",
335];
336
337const STATIC_WRITE_TRACK_NAMES: [&str; 8] = [
338 "Tracefs write 0",
339 "Tracefs write 1",
340 "Tracefs write 2",
341 "Tracefs write 3",
342 "Tracefs write 4",
343 "Tracefs write 5",
344 "Tracefs write 6",
345 "Tracefs write 7",
346];
347
348impl<'a> TraceEventQueue {
349 fn new(cpu_id: u32) -> Result<Self, Errno> {
350 let metadata = TraceEventQueueMetadata::new();
351 let ring_buffer: MemoryObject = zx::Vmo::create_with_opts(zx::VmoOptions::RESIZABLE, 0)
352 .map_err(|_| errno!(ENOMEM))?
353 .into();
354 let ring_buffer = ring_buffer.with_zx_name(b"starnix:tracefs");
355 let async_id_read = fuchsia_trace::Id::random();
356 let async_id_write = fuchsia_trace::Id::random();
357
358 Ok(Self {
359 metadata: Mutex::new(metadata),
360 ring_buffer,
361 async_id_read,
362 async_id_write,
363 cpu_id,
364 })
365 }
366
367 pub fn read_track_name(&self) -> std::borrow::Cow<'static, str> {
368 if let Some(&name) = STATIC_READ_TRACK_NAMES.get(self.cpu_id as usize) {
369 std::borrow::Cow::Borrowed(name)
370 } else {
371 std::borrow::Cow::Owned(format!("Tracefs read {}", self.cpu_id))
372 }
373 }
374
375 pub fn write_track_name(&self) -> std::borrow::Cow<'static, str> {
376 if let Some(&name) = STATIC_WRITE_TRACK_NAMES.get(self.cpu_id as usize) {
377 std::borrow::Cow::Borrowed(name)
378 } else {
379 std::borrow::Cow::Owned(format!("Tracefs write {}", self.cpu_id))
380 }
381 }
382
383 fn enable(&self) -> Result<(), Errno> {
384 let mut metadata = self.metadata.lock();
387 metadata.prev_timestamp = BootInstant::get();
388 self.ring_buffer
389 .set_size(DEFAULT_RING_BUFFER_SIZE_BYTES)
390 .map_err(|e| from_status_like_fdio!(e))?;
391
392 let vmo = self.ring_buffer.as_vmo().expect("Trace FS's memory must be VMO backed.");
393 let addr = vmar_root_self()
394 .map(
395 0,
396 &vmo,
397 0,
398 DEFAULT_RING_BUFFER_SIZE_BYTES as usize,
399 zx::VmarFlags::PERM_READ | zx::VmarFlags::PERM_WRITE,
400 )
401 .map_err(|e| from_status_like_fdio!(e))?;
402
403 metadata.mapping = Some(unsafe {
413 SharedBuffer::new(addr as *mut u8, DEFAULT_RING_BUFFER_SIZE_BYTES as usize)
414 });
415
416 self.initialize_page(0, metadata.prev_timestamp)?;
417 Ok(())
418 }
419
420 fn disable(&self) -> Result<u64, Errno> {
423 let mut metadata = self.metadata.lock();
426 let dropped = metadata.dropped_pages;
427 *metadata = TraceEventQueueMetadata::new();
428 self.ring_buffer.set_size(0).map_err(|e| from_status_like_fdio!(e))?;
429
430 Ok(dropped)
431 }
432
433 pub fn read(&self, buf: &mut dyn OutputBuffer) -> Result<usize, Errno> {
438 let offset = {
440 let mut metadata = self.metadata.lock();
441 metadata.read()?
442 };
443
444 buf.write_all(
449 &self.ring_buffer.read_to_vec(offset, *PAGE_SIZE).map_err(|_| errno!(ENOMEM))?,
450 )
451 }
452
453 pub fn push_event(
460 &self,
461 mut event: TraceEvent,
462 data: &[u8],
463 ) -> Result<zx::Duration<BootTimeline>, Errno> {
464 let mut metadata = self.metadata.lock();
465 if metadata.mapping.is_none() {
466 return Err(errno!(ENOMEM));
467 };
468
469 let timestamp = BootInstant::get();
478
479 event.set_timestamp(timestamp, metadata.prev_timestamp);
480
481 let old_tail_page = metadata.tail_page_offset();
483 let offset = metadata.reserve(event.size() as u64)?;
484
485 let new_tail_page = metadata.tail_page_offset();
487 if new_tail_page != old_tail_page {
488 self.initialize_page(new_tail_page, metadata.prev_timestamp)?;
489 }
490
491 let bytes = event.as_bytes();
493 if let Some(ref mapping) = metadata.mapping {
494 mapping.write_at(offset as usize, bytes);
495 mapping.write_at(offset as usize + bytes.len(), data);
496 mapping.write_at(offset as usize + bytes.len() + data.len(), b"\n");
497 }
498 metadata.commit(event.size() as u64);
499
500 let new_commit = ((metadata.commit % *PAGE_SIZE) - PAGE_HEADER_SIZE).to_le_bytes();
502 let commit_offset = metadata.commit_field_offset() as usize;
503 if let Some(ref mapping) = metadata.mapping {
504 mapping.write_at(commit_offset, &new_commit);
505 }
506
507 let delta = timestamp - metadata.prev_timestamp;
508 metadata.prev_timestamp = timestamp;
509
510 Ok(delta)
511 }
512
513 #[cfg(test)]
514 fn prev_timestamp(&self) -> BootInstant {
516 self.metadata.lock().prev_timestamp
517 }
518
519 fn initialize_page(&self, offset: u64, prev_timestamp: BootInstant) -> Result<(), Errno> {
522 fuchsia_trace::duration!(starnix_logging::CATEGORY_TRACE_META, "initialize_page");
523 self.ring_buffer
524 .write(&prev_timestamp.into_nanos().to_le_bytes(), offset)
525 .map_err(|e| from_status_like_fdio!(e))?;
526 let timestamp_size = std::mem::size_of::<BootInstant>() as u64;
527 self.ring_buffer
528 .op_range(zx::VmoOp::ZERO, offset + timestamp_size, *PAGE_SIZE - timestamp_size)
529 .map_err(|e| from_status_like_fdio!(e))?;
530 Ok(())
531 }
532}
533
534pub struct TraceEventQueueList {
535 pub queues: Vec<Arc<TraceEventQueue>>,
536 tracing_enabled: Arc<AtomicBool>,
537 tracefs_node: fuchsia_inspect::Node,
538}
539
540impl TraceEventQueueList {
541 pub fn from(kernel: &Kernel) -> Arc<Self> {
542 kernel.expando.get_or_init(|| {
543 let num_cpus = zx::system_get_num_cpus();
544 let tracing_enabled = Arc::new(AtomicBool::new(false));
545 let tracefs_node = kernel.inspect_node.create_child("tracefs");
546
547 let mut queues = vec![];
548 for cpu in 0..num_cpus {
549 let queue = Arc::new(TraceEventQueue::new(cpu as u32).expect("create queue"));
550 queues.push(queue);
551 }
552 Self { queues, tracing_enabled, tracefs_node }
553 })
554 }
555
556 pub fn is_enabled(&self) -> bool {
557 self.tracing_enabled.load(Ordering::Acquire)
558 }
559
560 pub fn enable(&self) -> Result<(), Errno> {
561 let mut first_error = None;
562 for queue in &self.queues {
563 if let Err(e) = queue.enable() {
564 if first_error.is_none() {
565 first_error = Some(e);
566 }
567 }
568 }
569 if let Some(e) = first_error {
570 return Err(e);
571 }
572 self.tracing_enabled.store(true, Ordering::Release);
573 Ok(())
574 }
575
576 pub fn disable(&self) -> Result<(), Errno> {
577 self.tracing_enabled.store(false, Ordering::Release);
579 let mut first_error = None;
580 let mut total_dropped = 0;
581 for queue in &self.queues {
586 match queue.disable() {
587 Ok(dropped) => total_dropped += dropped,
588 Err(e) => {
589 if first_error.is_none() {
590 first_error = Some(e);
591 }
592 }
593 }
594 }
595
596 self.tracefs_node.record_uint(DROPPED_PAGES, total_dropped);
597
598 if let Some(e) = first_error {
599 return Err(e);
600 }
601 Ok(())
602 }
603}
604
605#[cfg(test)]
606mod tests {
607 use super::{
608 DEFAULT_RING_BUFFER_SIZE_BYTES, PAGE_HEADER_SIZE, TraceEvent, TraceEventQueue,
609 TraceEventQueueMetadata,
610 };
611 use crate::vfs::OutputBuffer;
612 use crate::vfs::buffers::VecOutputBuffer;
613 use starnix_types::PAGE_SIZE;
614 use starnix_uapi::error;
615
616 #[fuchsia::test]
617 fn metadata_errors() {
618 let mut metadata = TraceEventQueueMetadata::new();
619 assert_eq!(metadata.read(), error!(EAGAIN));
620 assert_eq!(metadata.reserve(*PAGE_SIZE), error!(EINVAL));
621 }
622
623 #[fuchsia::test]
624 fn metadata_push_event_simple() {
625 let mut metadata = TraceEventQueueMetadata::new();
626 let event_size = 30;
627 let reserved_offset = metadata.reserve(event_size).expect("reserve failed");
628 assert_eq!(reserved_offset, PAGE_HEADER_SIZE);
629 assert_eq!(metadata.head, 0);
630 assert_eq!(metadata.commit, PAGE_HEADER_SIZE);
631 assert_eq!(metadata.tail, PAGE_HEADER_SIZE + event_size);
632
633 metadata.commit(event_size);
634 assert_eq!(metadata.head, 0);
635 assert_eq!(metadata.commit, PAGE_HEADER_SIZE + event_size);
636 assert_eq!(metadata.tail, PAGE_HEADER_SIZE + event_size);
637 }
638
639 #[fuchsia::test]
640 fn metadata_push_event_next_page() {
641 let mut metadata = TraceEventQueueMetadata::new();
642 metadata.commit = *PAGE_SIZE - 1;
644 metadata.tail = *PAGE_SIZE - 1;
645
646 let event_size = 30;
648 let reserved_offset = metadata.reserve(event_size).expect("reserve failed");
649 assert_eq!(reserved_offset, *PAGE_SIZE + PAGE_HEADER_SIZE);
650 assert_eq!(metadata.head, 0);
651 assert_eq!(metadata.commit, *PAGE_SIZE - 1);
652 assert_eq!(metadata.tail, *PAGE_SIZE + PAGE_HEADER_SIZE + event_size);
653
654 metadata.commit(event_size);
656 assert_eq!(metadata.head, 0);
657 assert_eq!(metadata.commit, *PAGE_SIZE + PAGE_HEADER_SIZE + event_size);
658 assert_eq!(metadata.tail, *PAGE_SIZE + PAGE_HEADER_SIZE + event_size);
659 }
660
661 #[fuchsia::test]
662 fn metadata_reserve_full() {
663 let mut metadata = TraceEventQueueMetadata::new();
664 metadata.commit = DEFAULT_RING_BUFFER_SIZE_BYTES;
665 metadata.tail = DEFAULT_RING_BUFFER_SIZE_BYTES;
666
667 metadata.overwrite = false;
669 assert_eq!(metadata.reserve(30), error!(ENOMEM));
670
671 metadata.overwrite = true;
673 assert_eq!(metadata.reserve(30), Ok(PAGE_HEADER_SIZE));
674 assert_eq!(metadata.head_page_offset(), *PAGE_SIZE);
675 assert_eq!(metadata.dropped_pages, 1);
676 }
677
678 #[fuchsia::test]
679 fn metadata_read_simple() {
680 let mut metadata = TraceEventQueueMetadata::new();
681 metadata.is_readable = true;
682
683 assert_eq!(metadata.read(), Ok(0));
684 assert_eq!(metadata.head, *PAGE_SIZE);
685 }
686
687 #[fuchsia::test]
688 fn metadata_read_meets_commit() {
689 let mut metadata = TraceEventQueueMetadata::new();
690 metadata.is_readable = true;
691 metadata.commit = *PAGE_SIZE + PAGE_HEADER_SIZE + 30;
692
693 assert_eq!(metadata.read(), Ok(0));
694 assert_eq!(metadata.head, *PAGE_SIZE);
695 assert!(!metadata.is_readable);
696 assert_eq!(metadata.read(), error!(EAGAIN));
697 }
698
699 #[fuchsia::test]
700 fn read_empty_queue() {
701 let queue = TraceEventQueue::new(0).expect("create queue");
702 let mut buffer = VecOutputBuffer::new(*PAGE_SIZE as usize);
703 assert_eq!(queue.read(&mut buffer), error!(EAGAIN));
704 }
705
706 #[fuchsia::test]
707 fn enable_disable_queue() {
708 let queue = TraceEventQueue::new(0).expect("create queue");
709 assert_eq!(queue.ring_buffer.get_size(), 0);
710
711 assert!(queue.enable().is_ok());
713 assert_eq!(queue.ring_buffer.get_size(), DEFAULT_RING_BUFFER_SIZE_BYTES);
714
715 let data = b"B|1234|slice_name";
717 let event = TraceEvent::new(1234, data.len());
718 let event_size = event.size() as u64;
719 let result = queue.push_event(event, data);
720 assert!(result.is_ok());
721 assert!(result.ok().expect("delta").into_nanos() > 0);
722 assert_eq!(queue.metadata.lock().commit, PAGE_HEADER_SIZE + event_size);
723
724 assert!(queue.disable().is_ok());
726 assert_eq!(queue.ring_buffer.get_size(), 0);
727 assert_eq!(queue.metadata.lock().commit, PAGE_HEADER_SIZE);
728 }
729
730 #[fuchsia::test]
731 fn create_trace_event() {
732 let event = TraceEvent::new(1234, b"B|1234|slice_name".len());
734 let event_size = event.size();
735 assert_eq!(event_size, 42);
736 }
737
738 #[fuchsia::test]
740 fn single_trace_event_fails_read() {
741 let queue = TraceEventQueue::new(0).expect("create queue");
742 queue.enable().expect("enable queue");
743 let data = b"B|1234|slice_name";
745 let event = TraceEvent::new(1234, data.len());
746
747 let result = queue.push_event(event, data);
749 assert!(result.is_ok());
750 assert!(result.ok().expect("delta").into_nanos() > 0);
751
752 let mut buffer = VecOutputBuffer::new(*PAGE_SIZE as usize);
753 assert_eq!(queue.read(&mut buffer), error!(EAGAIN));
754 }
755
756 #[fuchsia::test]
757 fn page_overflow() {
758 let queue = TraceEventQueue::new(0).expect("create queue");
759 queue.enable().expect("enable queue");
760 let queue_start_timestamp = queue.prev_timestamp();
761 let pid = 1234;
762 let data = b"B|1234|loooooooooooooooooooooooooooooooooooooooooooooooooooooooooo\
763 ooooooooooooooooooooooooooooooooooooooooooooooooooooooooongevent";
764 let expected_event = TraceEvent::new(pid, data.len());
765 assert_eq!(expected_event.size(), 155);
766
767 for _ in 0..27 {
769 let event = TraceEvent::new(pid, data.len());
770 let result = queue.push_event(event, data);
771 assert!(result.is_ok());
772 assert!(result.ok().expect("delta").into_nanos() > 0);
773 }
774
775 let mut buffer = VecOutputBuffer::new(*PAGE_SIZE as usize);
777 assert_eq!(queue.read(&mut buffer), Ok(*PAGE_SIZE as usize));
778 assert_eq!(buffer.bytes_written() as u64, *PAGE_SIZE);
779
780 let mut expected_page_header: Vec<u8> = vec![];
781 expected_page_header
782 .extend_from_slice(&(queue_start_timestamp.into_nanos() as u64).to_le_bytes());
783 expected_page_header.extend_from_slice(&(expected_event.size() * 26).to_le_bytes());
784
785 assert!(buffer.data().starts_with(&expected_page_header));
786
787 let mut buffer = VecOutputBuffer::new(*PAGE_SIZE as usize);
789 assert_eq!(queue.read(&mut buffer), error!(EAGAIN));
790 }
791}