1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
45//! Utility for profiling nested/hierarchical durations and reporting their accumulated runtime
6//! in inspect. See caveats below before using.
7//!
8//! # Caveats
9//!
10//! Profiling data collected by this module has some runtime overhead depending on the level of
11//! nesting and frequency of entrances and exits. Do not call `start_self_profiling` in production.
12//!
13//! The method this tool uses to collect nested durations' runtimes prevents them from perfectly
14//! summing to 100% of the parent duration's runtime, even when the code is completely covered by
15//! adjoining durations. As a rule of thumb, the error should be limited to a few percent of the
16//! parent's runtime so if you see >10% unaccounted runtime you may need to add more durations.
17//!
18//! Runtimes are accumulated with atomics rather than with a single lock per duration to reduce
19//! error accumulation from lock contention, but this means that a given inspect snapshot may
20//! observe partially written results. This means that the tool is best suited for measuring
21//! workloads where durations occur at a high frequency (and as a result may be a poor fit for
22//! tracing) or where the timing of the inspect snapshot can be run after the profiled code has
23//! exited all durations of interest.
24//!
25//! This module does not yet integrate well with `async`/`.await` workloads.
26//!
27//! # Setup
28//!
29//! There are three steps required to collect profiling data with this module:
30//!
31//! 1. register a lazy node at the top of your component's inspect hierarchies using
32//! `ProfileDuration::lazy_node_callback()`
33//! 2. call `start_self_profiling()`
34//! 3. invoke `profile_duration!("...");` at the beginning of scopes of interest
35//!
36//! # Analysis
37//!
38//! To see a human-readable summary of the captured profiles, take a snapshot of your component's
39//! inspect and pass the results to `self_profiles_report::SelfProfilesReport`.
4041use fuchsia_inspect::{Inspector, Node};
42use fuchsia_sync::Mutex;
43use std::cell::RefCell;
44use std::collections::BTreeMap;
45use std::future::Future;
46use std::panic::Location;
47use std::pin::Pin;
48use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering};
49use std::sync::{Arc, LazyLock};
50use zx::{self as zx, Task as _};
5152static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false);
5354static ROOT_PROFILE_DURATION: LazyLock<Arc<ProfileDurationTree>> =
55 LazyLock::new(|| Arc::new(ProfileDurationTree::new(Location::caller())));
5657thread_local! {
58static CURRENT_PROFILE_DURATION: RefCell<Arc<ProfileDurationTree>> =
59 RefCell::new(ROOT_PROFILE_DURATION.clone());
60}
6162/// Profile the remainder of the invoking lexical scope under the provided name.
63///
64/// If you need to split a single lexical scope into multiple durations, consider using
65/// `ProfileDuration::enter` and `ProfileDuration::pivot`.
66#[macro_export]
67macro_rules! profile_duration {
68 ($name:expr) => {
69let _guard = $crate::ProfileDuration::enter($name);
70 };
71}
7273/// Start collecting profiling information for any durations entered after this call is made.
74pub fn start_self_profiling() {
75 PROFILING_ENABLED.store(true, Ordering::Relaxed);
76}
7778/// Stop collecting profiling information for any durations entered after this call is made.
79pub fn stop_self_profiling() {
80 PROFILING_ENABLED.store(false, Ordering::Relaxed);
81}
8283#[inline]
84fn profiling_enabled() -> bool {
85 PROFILING_ENABLED.load(Ordering::Relaxed)
86}
8788/// A guard value to manually control entry/exit of profile durations.
89pub struct ProfileDuration {
90 inner: Option<InnerGuard>,
91}
9293impl ProfileDuration {
94/// Function to be passed to `fuchsia_inspect::Node::record_lazy_child` to record profiling data.
95pub fn lazy_node_callback(
96 ) -> Pin<Box<dyn Future<Output = Result<Inspector, anyhow::Error>> + Send + 'static>> {
97 Box::pin(async move {
98let inspector = Inspector::default();
99 ROOT_PROFILE_DURATION.report(inspector.root());
100101// Make sure the analysis tooling can find this node regardless of the user-defined name.
102inspector.root().record_bool("__profile_durations_root", true);
103Ok(inspector)
104 })
105 }
106107/// Manually enter a profile duration. The duration will be exited when the returned value is
108 /// dropped.
109 ///
110 /// If using a single variable binding to hold guards for multiple durations, call
111 /// `ProfileDuration::pivot` instead of re-assigning to the binding.
112#[track_caller]
113pub fn enter(name: &'static str) -> Self {
114Self { inner: if profiling_enabled() { Some(InnerGuard::enter(name)) } else { None } }
115 }
116117/// End the current duration and enter a new one, re-using the same guard value. Allows
118 /// splitting a single lexical scope into multiple durations.
119#[track_caller]
120pub fn pivot(&mut self, name: &'static str) {
121if profiling_enabled() {
122 drop(self.inner.take());
123self.inner = Some(InnerGuard::enter(name));
124 }
125 }
126}
127128struct InnerGuard {
129 start_runtime: zx::TaskRuntimeInfo,
130 start_monotonic_ns: zx::MonotonicInstant,
131 parent_duration: Arc<ProfileDurationTree>,
132}
133134impl InnerGuard {
135#[track_caller]
136fn enter(name: &'static str) -> Self {
137let start_monotonic_ns = zx::MonotonicInstant::get();
138let start_runtime = current_thread_runtime();
139140// Get the location outside the below closure since it can't be track_caller on stable.
141let location = Location::caller();
142let parent_duration = CURRENT_PROFILE_DURATION.with(|current_duration| {
143let mut current_duration = current_duration.borrow_mut();
144let child_duration = current_duration.child(name, location);
145 std::mem::replace(&mut *current_duration, child_duration)
146 });
147148Self { start_runtime, start_monotonic_ns, parent_duration }
149 }
150}
151152impl Drop for InnerGuard {
153fn drop(&mut self) {
154 CURRENT_PROFILE_DURATION.with(|current_duration| {
155let mut current_duration = current_duration.borrow_mut();
156let completed_duration =
157 std::mem::replace(&mut *current_duration, self.parent_duration.clone());
158159let runtime_delta = current_thread_runtime() - self.start_runtime;
160let wall_time_delta = zx::MonotonicInstant::get() - self.start_monotonic_ns;
161162 completed_duration.count.fetch_add(1, Ordering::Relaxed);
163 completed_duration.wall_time.fetch_add(wall_time_delta.into_nanos(), Ordering::Relaxed);
164 completed_duration.cpu_time.fetch_add(runtime_delta.cpu_time, Ordering::Relaxed);
165 completed_duration.queue_time.fetch_add(runtime_delta.queue_time, Ordering::Relaxed);
166 completed_duration
167 .page_fault_time
168 .fetch_add(runtime_delta.page_fault_time, Ordering::Relaxed);
169 completed_duration
170 .lock_contention_time
171 .fetch_add(runtime_delta.lock_contention_time, Ordering::Relaxed);
172 });
173 }
174}
175176#[derive(Debug)]
177struct ProfileDurationTree {
178 location: &'static Location<'static>,
179 count: AtomicU64,
180 wall_time: AtomicI64,
181 cpu_time: AtomicI64,
182 queue_time: AtomicI64,
183 page_fault_time: AtomicI64,
184 lock_contention_time: AtomicI64,
185 children: Mutex<BTreeMap<&'static str, Arc<Self>>>,
186}
187188impl ProfileDurationTree {
189fn new(location: &'static Location<'static>) -> Self {
190Self {
191 location,
192 count: Default::default(),
193 wall_time: Default::default(),
194 cpu_time: Default::default(),
195 queue_time: Default::default(),
196 page_fault_time: Default::default(),
197 lock_contention_time: Default::default(),
198 children: Default::default(),
199 }
200 }
201202fn child(&self, name: &'static str, location: &'static Location<'static>) -> Arc<Self> {
203self.children.lock().entry(name).or_insert_with(|| Arc::new(Self::new(location))).clone()
204 }
205206fn report(&self, node: &Node) {
207let children = self.children.lock();
208 node.record_string("location", self.location.to_string());
209 node.record_uint("count", self.count.load(Ordering::Relaxed));
210 node.record_int("wall_time", self.wall_time.load(Ordering::Relaxed));
211 node.record_int("cpu_time", self.cpu_time.load(Ordering::Relaxed));
212 node.record_int("queue_time", self.queue_time.load(Ordering::Relaxed));
213 node.record_int("page_fault_time", self.page_fault_time.load(Ordering::Relaxed));
214 node.record_int("lock_contention_time", self.lock_contention_time.load(Ordering::Relaxed));
215for (name, child) in children.iter() {
216 node.record_child(*name, |n| child.report(n));
217 }
218 }
219}
220221fn current_thread_runtime() -> zx::TaskRuntimeInfo {
222 fuchsia_runtime::thread_self()
223 .get_runtime_info()
224 .expect("should always be able to read own thread's runtime info")
225}