Skip to main content

periodic_monitoring/
lib.rs

1// Copyright 2025 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4use anyhow::{Context, Result};
5use attribution_processing::digest::{BucketDefinition, Digest};
6use attribution_processing::summary::MemorySummary;
7use attribution_processing::{AttributionDataProvider, ProcessedAttributionData, attribute_vmos};
8use fuchsia_async::WakeupTime;
9use fuchsia_inspect::{ArrayProperty, Node, StringProperty};
10use fuchsia_inspect_contrib::nodes::BoundedListNode;
11use fuchsia_trace::duration;
12use futures::{TryFutureExt, join, try_join};
13use humansize::{BINARY, FormatSizeOptions, format_size};
14use stalls::StallProvider;
15use traces::CATEGORY_MEMORY_CAPTURE;
16
17use fidl_fuchsia_kernel as fkernel;
18use fidl_fuchsia_metrics as fmetrics;
19
20/// Periodically collect and report memory attribution data.
21///
22/// This produces a simplified schedule where, instead of having
23/// independent reports on their own cadence, we collect based on
24/// cobalt's frequency and produce all other reports based on that one
25/// collection, saving significant CPU at the expense of flexibility.
26pub async fn periodic_monitoring(
27    kernel_stats_proxy: fkernel::StatsProxy,
28    attribution_data_service: &impl AttributionDataProvider,
29    stall_provider: &impl StallProvider,
30    metric_event_logger: &fmetrics::MetricEventLoggerProxy,
31    bucket_definitions: &[BucketDefinition],
32    inspect_root: Node,
33) -> Result<()> {
34    let mut _current; // Ensure the inspect property is kept as long as necessary.
35    let mut bucket_list_node = std::cell::OnceCell::new();
36    let bucket_names = std::cell::OnceCell::new();
37    let bucket_codes = cobalt::prepare_bucket_codes(bucket_definitions);
38    loop {
39        {
40            duration!(CATEGORY_MEMORY_CAPTURE, c"periodic_monitoring");
41            let timestamp = zx::BootInstant::get();
42            // Retrieve (concurrently) the data necessary to perform the aggregation.
43            let (kmem_stats, kmem_stats_compression) = try_join!(
44                kernel_stats_proxy.get_memory_stats().map_err(anyhow::Error::from),
45                kernel_stats_proxy.get_memory_stats_compression().map_err(anyhow::Error::from)
46            )
47            .with_context(|| "Failed to get kernel memory stats")?;
48            // This is the very expensive operation.
49            let attribution_data = attribute_vmos(attribution_data_service.get_attribution_data()?);
50            let digest = Digest::compute(
51                &attribution_data,
52                &kmem_stats,
53                &kmem_stats_compression,
54                bucket_definitions,
55                false,
56            )?;
57            _current =
58                update_inspect_summary(attribution_data, timestamp, &kmem_stats, &inspect_root);
59            cobalt::upload_metrics(
60                timestamp,
61                &kmem_stats,
62                metric_event_logger,
63                &digest,
64                &bucket_codes,
65            )
66            .await?;
67            {
68                // Initialize the inspect property containing the buckets names, if necessary.
69                let _ = bucket_names.get_or_init(|| {
70                    // Create inspect node to store buckets related information.
71                    let bucket_names =
72                        inspect_root.create_string_array("buckets", digest.buckets.len());
73                    for (i, attribution_processing::digest::Bucket { name, .. }) in
74                        digest.buckets.iter().enumerate()
75                    {
76                        bucket_names.set(i, name);
77                    }
78                    bucket_names
79                });
80            }
81            update_inspect_history(
82                timestamp,
83                &digest,
84                stall_provider,
85                &mut bucket_list_node,
86                &inspect_root,
87            )?;
88        }
89        join!(
90            fuchsia_async::Task::local(async {
91                let _ = scudo::mallopt(scudo::M_PURGE_ALL, 0);
92            }),
93            zx::MonotonicDuration::from_minutes(5).into_timer()
94        );
95    }
96}
97
98fn update_inspect_summary(
99    attribution_data: ProcessedAttributionData,
100    timestamp: zx::BootInstant,
101    kmem_stats: &fkernel::MemoryStats,
102    inspect_root: &Node,
103) -> StringProperty {
104    inspect_root.create_string(
105        "current",
106        record_summary(attribution_data.summary(), timestamp, &kmem_stats),
107    )
108}
109
110/// Update inspect data with collected memory information.
111fn update_inspect_history(
112    timestamp: zx::BootInstant,
113    digest: &Digest,
114    stall_provider: &impl StallProvider,
115    bucket_list_node: &mut std::cell::OnceCell<BoundedListNode>,
116    inspect_root: &Node,
117) -> Result<()> {
118    let stall_values =
119        stall_provider.get_stall_info().with_context(|| "Unable to retrieve stall information")?;
120    // Add an entry for the current aggregation.
121    let _ = bucket_list_node
122        .get_or_init(|| BoundedListNode::new(inspect_root.create_child("measurements"), 100));
123    bucket_list_node.get_mut().unwrap().add_entry(|n| {
124        n.record_int("timestamp", timestamp.into_nanos());
125        {
126            let committed_sizes = n.create_uint_array("bucket_sizes", digest.buckets.len());
127            let populated_sizes =
128                n.create_uint_array("bucket_sizes_populated", digest.buckets.len());
129            for (i, b) in digest.buckets.iter().enumerate() {
130                committed_sizes.set(i, b.committed_size as u64);
131                populated_sizes.set(i, b.populated_size as u64);
132            }
133            n.record(committed_sizes);
134            n.record(populated_sizes);
135        }
136
137        n.record_child("stalls", |child| {
138            child.record_uint(
139                "some_ms",
140                stall_values.some.as_millis().try_into().unwrap_or(u64::MAX),
141            );
142            child.record_uint(
143                "full_ms",
144                stall_values.full.as_millis().try_into().unwrap_or(u64::MAX),
145            );
146        });
147    });
148    Ok(())
149}
150
151fn record_summary(
152    mut summary: MemorySummary,
153    timestamp: zx::Instant<zx::BootTimeline>,
154    kmem_stats: &fkernel::MemoryStats,
155) -> String {
156    let size_options = FormatSizeOptions::from(BINARY).space_after_value(false);
157    summary.principals.sort_by_key(|p| std::cmp::Reverse(p.populated_private));
158    format!(
159        "Time: {} VMO: {} Free: {}\n{}",
160        timestamp.into_nanos(),
161        kmem_stats
162            .vmo_bytes
163            .and_then(|b| Some(format_size(b, size_options)))
164            .unwrap_or_else(|| "?".to_string()),
165        kmem_stats
166            .free_bytes
167            .and_then(|b| Some(format_size(b, size_options)))
168            .unwrap_or_else(|| "?".to_string()),
169        summary
170            .principals
171            .iter_mut()
172            .filter_map(|principal| {
173                if principal.populated_total == 0 {
174                    return None;
175                }
176                let (populated_private, populated_scaled, populated_total) = match (|| {
177                    Some((
178                        format_size(principal.populated_private, size_options),
179                        format_size(principal.populated_scaled as u64, size_options),
180                        format_size(principal.populated_total, size_options),
181                    ))
182                })(
183                ) {
184                    Some(ok) => ok,
185                    None => return None,
186                };
187                let mut vmos = principal.vmos.iter().collect::<Vec<_>>();
188                vmos.sort_by_key(|(_, vmo)| {
189                    std::cmp::Reverse((vmo.committed_private, vmo.committed_scaled as u64))
190                });
191                let sizes = if populated_total == populated_private {
192                    format_args!("{}", populated_total)
193                } else {
194                    format_args!("{} {} {}", populated_private, populated_scaled, populated_total)
195                };
196                Some(format!(
197                    "{}: {}; {}",
198                    principal.name,
199                    sizes,
200                    vmos.iter()
201                        .filter_map(|(name, vmo)| {
202                            if vmo.committed_total == 0 {
203                                None
204                            } else {
205                                Some(format!(
206                                    "{} {} {} {}",
207                                    name,
208                                    format_size(vmo.populated_private, size_options),
209                                    format_size(vmo.populated_scaled as u64, size_options),
210                                    format_size(vmo.populated_total, size_options)
211                                ))
212                            }
213                        })
214                        .collect::<Vec<_>>()
215                        .join("; ")
216                ))
217            })
218            .collect::<Vec<_>>()
219            .join("\n")
220    )
221}
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use attribution_processing::{
226        Attribution, AttributionData, GlobalPrincipalIdentifier, Principal, PrincipalDescription,
227        PrincipalType, Resource, ResourceReference, ZXName,
228    };
229    use diagnostics_assertions::{NonZeroIntProperty, assert_data_tree};
230    use std::num::NonZero;
231    use std::time::Duration;
232
233    use fidl_fuchsia_memory_attribution_plugin as fplugin;
234
235    fn get_kernel_stats() -> (fkernel::MemoryStats, fkernel::MemoryStatsCompression) {
236        (
237            fkernel::MemoryStats {
238                total_bytes: Some(1),
239                free_bytes: Some(2),
240                wired_bytes: Some(3),
241                total_heap_bytes: Some(4),
242                free_heap_bytes: Some(5),
243                vmo_bytes: Some(6),
244                mmu_overhead_bytes: Some(7),
245                ipc_bytes: Some(8),
246                other_bytes: Some(9),
247                free_loaned_bytes: Some(10),
248                cache_bytes: Some(11),
249                slab_bytes: Some(12),
250                zram_bytes: Some(13),
251                vmo_reclaim_total_bytes: Some(14),
252                vmo_reclaim_newest_bytes: Some(15),
253                vmo_reclaim_oldest_bytes: Some(16),
254                vmo_reclaim_disabled_bytes: Some(17),
255                vmo_discardable_locked_bytes: Some(18),
256                vmo_discardable_unlocked_bytes: Some(19),
257                ..Default::default()
258            },
259            fkernel::MemoryStatsCompression {
260                uncompressed_storage_bytes: Some(20),
261                compressed_storage_bytes: Some(21),
262                compressed_fragmentation_bytes: Some(22),
263                compression_time: Some(23),
264                decompression_time: Some(24),
265                total_page_compression_attempts: Some(25),
266                failed_page_compression_attempts: Some(26),
267                total_page_decompressions: Some(27),
268                compressed_page_evictions: Some(28),
269                eager_page_compressions: Some(29),
270                memory_pressure_page_compressions: Some(30),
271                critical_memory_page_compressions: Some(31),
272                pages_decompressed_unit_ns: Some(32),
273                pages_decompressed_within_log_time: Some([40, 41, 42, 43, 44, 45, 46, 47]),
274
275                ..Default::default()
276            },
277        )
278    }
279
280    fn get_attribution_data() -> ProcessedAttributionData {
281        attribute_vmos(AttributionData {
282            principals_vec: vec![Principal {
283                identifier: GlobalPrincipalIdentifier(NonZero::new(1).unwrap()),
284                description: Some(PrincipalDescription::Component("principal".to_owned())),
285                principal_type: PrincipalType::Runnable,
286                parent: None,
287            }],
288            resources_vec: vec![Resource {
289                koid: 10,
290                name_index: 0,
291                resource_type: fplugin::ResourceType::Vmo(fplugin::Vmo {
292                    parent: None,
293                    private_committed_bytes: Some(1024),
294                    private_populated_bytes: Some(2048),
295                    scaled_committed_bytes: Some(1024),
296                    scaled_populated_bytes: Some(2048),
297                    total_committed_bytes: Some(1024),
298                    total_populated_bytes: Some(2048),
299                    ..Default::default()
300                }),
301            }],
302            resource_names: vec![ZXName::from_string_lossy("resource")],
303            attributions: vec![Attribution {
304                source: GlobalPrincipalIdentifier(NonZero::new(1).unwrap()),
305                subject: GlobalPrincipalIdentifier(NonZero::new(1).unwrap()),
306                resources: vec![ResourceReference::KernelObject(10)],
307            }],
308        })
309    }
310
311    #[derive(Clone)]
312    struct FakeStallProvider {}
313    impl StallProvider for FakeStallProvider {
314        fn get_stall_info(&self) -> Result<stalls::MemoryStallMetrics, anyhow::Error> {
315            Ok(stalls::MemoryStallMetrics {
316                some: Duration::from_millis(10),
317                full: Duration::from_millis(20),
318            })
319        }
320    }
321
322    #[fuchsia::test]
323    async fn test_update_inspect() -> Result<()> {
324        let inspector = fuchsia_inspect::Inspector::default();
325        let digest_node = inspector.root().create_child("logger");
326        let timestamp = zx::BootInstant::get();
327        let attribution_data = get_attribution_data();
328        let (kernel_stats, kernel_stats_compression) = get_kernel_stats();
329        let digest = Digest::compute(
330            &attribution_data,
331            &kernel_stats,
332            &kernel_stats_compression,
333            &vec![],
334            false,
335        )?;
336        let mut bucket_list_node = std::cell::OnceCell::new();
337        // Update inspect history twice, and ensure both instances are recorded.
338        let _summary =
339            update_inspect_summary(attribution_data, timestamp, &kernel_stats, &digest_node);
340        update_inspect_history(
341            timestamp,
342            &digest,
343            &FakeStallProvider {},
344            &mut bucket_list_node,
345            &digest_node,
346        )?;
347
348        update_inspect_history(
349            timestamp,
350            &digest,
351            &FakeStallProvider {},
352            &mut bucket_list_node,
353            &digest_node,
354        )?;
355        assert_data_tree!(inspector, root: {
356            logger: {
357                measurements: {
358                    // First update.
359                    "0": {
360                        timestamp: NonZeroIntProperty,
361                        bucket_sizes: vec![
362                            1024u64, // Undigested: matches the single unmatched VMO
363                            // Orphaned: vmo_bytes reported by the kernel but not covered by any
364                            // bucket => 6 - 1024 => 0 (saturating, cannot be negative)
365                            0u64,
366                            54u64,   // Kernel: 3 wired + 4 heap + 7 mmu + 8 IPC + 9 other + 12 slab + 11 cache = 54
367                            2u64,    // Free
368                            14u64,   // [Addl]PagerTotal
369                            15u64,   // [Addl]PagerNewest
370                            16u64,   // [Addl]PagerOldest
371                            18u64,   // [Addl]DiscardableLocked
372                            19u64,   // [Addl]DiscardableUnlocked
373                            21u64,   // [Addl]ZramCompressedBytes
374                            6u64,    // [Addl]PopulatedAnonymousBytes
375                        ],
376                        bucket_sizes_populated: vec![
377                            2048u64, // Undigested: matches the single unmatched VMO
378                            // Orphaned: vmo_bytes reported by the kernel but not covered by any
379                            // bucket => 6 - 1024 => 0 (saturating, cannot be negative)
380                            0u64,
381                            54u64,   // Kernel: 3 wired + 4 heap + 7 mmu + 8 IPC + 9 other + 12 slab + 11 cache = 54
382                            2u64,    // Free
383                            14u64,   // [Addl]PagerTotal
384                            15u64,   // [Addl]PagerNewest
385                            16u64,   // [Addl]PagerOldest
386                            18u64,   // [Addl]DiscardableLocked
387                            19u64,   // [Addl]DiscardableUnlocked
388                            21u64,   // [Addl]ZramCompressedBytes
389                            6u64,    // [Addl]PopulatedAnonymousBytes
390                        ],
391
392                        stalls: {
393                            some_ms: 10u64,
394                            full_ms: 20u64,
395                        },
396                    },
397                    // Second update.
398                    "1": {
399                        timestamp: NonZeroIntProperty,
400                        bucket_sizes: vec![
401                            1024u64, // Undigested: matches the single unmatched VMO
402                            // Orphaned: vmo_bytes reported by the kernel but not covered by any
403                            // bucket => 6 - 1024 => 0 (saturating, cannot be negative)
404                            0u64,
405                            54u64,   // Kernel: 3 wired + 4 heap + 7 mmu + 8 IPC + 9 other + 12 slab + 11 cache = 54
406                            2u64,    // Free
407                            14u64,   // [Addl]PagerTotal
408                            15u64,   // [Addl]PagerNewest
409                            16u64,   // [Addl]PagerOldest
410                            18u64,   // [Addl]DiscardableLocked
411                            19u64,   // [Addl]DiscardableUnlocked
412                            21u64,   // [Addl]ZramCompressedBytes
413                            6u64,    // [Addl]PopulatedAnonymousBytes
414                        ],
415                        bucket_sizes_populated: vec![
416                            2048u64, // Undigested: matches the single unmatched VMO
417                            // Orphaned: vmo_bytes reported by the kernel but not covered by any
418                            // bucket => 6 - 1024 => 0 (saturating, cannot be negative)
419                            0u64,
420                            54u64,   // Kernel: 3 wired + 4 heap + 7 mmu + 8 IPC + 9 other + 12 slab + 11 cache = 54
421                            2u64,    // Free
422                            14u64,   // [Addl]PagerTotal
423                            15u64,   // [Addl]PagerNewest
424                            16u64,   // [Addl]PagerOldest
425                            18u64,   // [Addl]DiscardableLocked
426                            19u64,   // [Addl]DiscardableUnlocked
427                            21u64,   // [Addl]ZramCompressedBytes
428                            6u64,    // [Addl]PopulatedAnonymousBytes
429                        ],
430                        stalls: {
431                            some_ms: 10u64,
432                            full_ms: 20u64,
433                        },
434                    },
435                },
436                current: regex::Regex::new(r"^Time: \d+ VMO: 6B Free: 2B\nprincipal: 2KiB; resource 2KiB 2KiB 2KiB")?,
437            },
438        });
439        Ok(())
440    }
441}