Skip to main content

persistence/
lib.rs

1// Copyright 2020 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! `diagnostics-persistence` component persists Inspect VMOs and serves them at the next boot.
6
7mod fetcher;
8mod file_handler;
9mod inspect_server;
10mod scheduler;
11
12use anyhow::{Context, Error, anyhow};
13use argh::FromArgs;
14use fidl::endpoints;
15use fidl::endpoints::ControlHandle;
16use fidl_fuchsia_component_sandbox as fsandbox;
17use fidl_fuchsia_diagnostics as fdiagnostics;
18use fidl_fuchsia_inspect as finspect;
19use fidl_fuchsia_process_lifecycle as flifecycle;
20use fidl_fuchsia_update as fupdate;
21use fuchsia_async as fasync;
22use fuchsia_component::server::ServiceFs;
23use fuchsia_inspect::component;
24use fuchsia_inspect::health::Reporter;
25use fuchsia_runtime::{HandleInfo, HandleType};
26use fuchsia_sync::Mutex;
27use futures::{FutureExt, StreamExt, TryStreamExt, select};
28use log::*;
29use persistence_build_config::Config;
30use sandbox::CapabilityRef;
31use scheduler::Scheduler;
32use serde::{Deserialize, Serialize};
33use std::pin::pin;
34use std::sync::{Arc, LazyLock};
35use zx::{BootInstant, HandleBased};
36
37/// The name of the subcommand and the logs-tag.
38pub const PROGRAM_NAME: &str = "persistence";
39pub const PERSIST_NODE_NAME: &str = "persist";
40/// Added after persisted data is fully published
41pub const PUBLISHED_TIME_KEY: &str = "published";
42
43/// Key in escrowed dictionary to immutable state persisted across instances of
44/// this component across the same boot.
45const INSTANCE_STATE_KEY: &str = "InstanceState";
46/// Key in escrowed dictionary to frozen Inspect VMO.
47const FROZEN_INSPECT_VMO_KEY: &str = "FrozenInspectVMO";
48
49/// Parsed CML structured configuration.
50#[derive(Clone, Debug)]
51pub(crate) struct BuildConfig {
52    /// If true, don't wait for a successful update check before publishing
53    /// previous boot's persisted Inspect data.
54    skip_update_check: bool,
55    /// Duration to wait for FIDL requests before stalling the connection.
56    stall_interval: zx::MonotonicDuration,
57}
58
59/// Build config, as defined by the CML structured configuration.
60pub(crate) static BUILD_CONFIG: LazyLock<BuildConfig> = LazyLock::new(|| {
61    let config = Config::take_from_startup_handle();
62    component::inspector().root().record_child("config", |node| config.record_inspect(node));
63
64    let Config { skip_update_check, stop_on_idle_timeout_millis } = config;
65
66    if skip_update_check {
67        info!("Configured to skip update check");
68    }
69
70    let stall_interval = if stop_on_idle_timeout_millis >= 0 {
71        info!("Configured to idle after {stop_on_idle_timeout_millis}ms of inactivity");
72        zx::MonotonicDuration::from_millis(stop_on_idle_timeout_millis)
73    } else {
74        info!("Not configured to idle after inactivity");
75        zx::MonotonicDuration::INFINITE
76    };
77
78    BuildConfig { skip_update_check, stall_interval }
79});
80
81/// Command line args
82#[derive(FromArgs, Debug, PartialEq)]
83#[argh(subcommand, name = "persistence")]
84pub struct CommandLine {}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
87enum UpdateCheckStage {
88    /// Waiting for the first update check before publishing previous boot
89    /// inspect data.
90    Waiting,
91    /// First update check was skipped, previous boot inspect data has been published.
92    Skipped,
93    /// First update check has completed, previous boot inspect data has been
94    /// published.
95    Done,
96    /// Unable to subscribe to the first update check.
97    Error,
98}
99/// State to be persisted between instances of this component across
100/// the same boot.
101#[derive(Debug, Serialize, Deserialize)]
102struct PersistedState {
103    /// Persistence config loaded from disk.
104    config: persistence_config::Config,
105    /// Stage of the update check.
106    update_stage: Mutex<UpdateCheckStage>,
107}
108
109enum InspectState {
110    /// Inspect data is actively being served by this component instance.
111    Active(inspect_runtime::PublishedInspectController),
112    /// Inspect data is escrowed with the Component Framework.
113    ///
114    /// Archivist monitors this handle for OBJECT_PEER_CLOSED. If the handle is dropped, the
115    /// Archivist removes the escrowed Inspect data. By preserving it here, we ensure data
116    /// availability even when we restart but skip active republication.
117    Escrowed(zx::NullableHandle),
118}
119
120/// All component-specific state.
121#[derive(Clone)]
122struct ComponentState {
123    /// State persisted across instances of this component.
124    persisted: Arc<PersistedState>,
125    /// Listener for Sample
126    scheduler: Scheduler,
127    /// Shared state for Inspect data.
128    inspect: Arc<Mutex<Option<InspectState>>>,
129}
130
131impl ComponentState {
132    /// Load state from a previous instance if possible, otherwise initialize
133    /// new state.
134    async fn load(
135        scope: fasync::ScopeHandle,
136        store: &sandbox::CapabilityStore,
137    ) -> Result<Self, Error> {
138        if let Some(dictionary) =
139            fuchsia_runtime::take_startup_handle(HandleInfo::new(HandleType::EscrowedDictionary, 0))
140        {
141            debug!("Loading component state from escrowed dictionary");
142            return ComponentState::from_escrow(scope.clone(), store, dictionary)
143                .await
144                .context("Failed to load component state from escrowed dictionary");
145        }
146
147        debug!("No escrowed dictionary available; generating one");
148        Self::new(scope.clone()).await.context("Failed to create component state")
149    }
150
151    async fn new(scope: fasync::ScopeHandle) -> Result<Self, Error> {
152        let inspect_controller = inspect_runtime::publish(
153            component::inspector(),
154            inspect_runtime::PublishOptions::default().custom_scope(scope.clone()),
155        )
156        .ok_or_else(|| anyhow!("failed to publish inspect"))?;
157
158        let config =
159            persistence_config::load_configuration_files().context("Error loading configs")?;
160        file_handler::forget_old_data(&config).await?;
161
162        let scheduler = Scheduler::new(&config);
163        scheduler
164            .subscribe(scope.clone(), &config)
165            .await
166            .context("Failed to subscribe to fuchsia.diagnostics.Sample")?;
167
168        let persisted = {
169            let update_stage = if BUILD_CONFIG.skip_update_check {
170                UpdateCheckStage::Skipped
171            } else {
172                UpdateCheckStage::Waiting
173            };
174            Arc::new(PersistedState { config, update_stage: Mutex::new(update_stage) })
175        };
176
177        if BUILD_CONFIG.skip_update_check {
178            publish_inspect_data().await;
179        } else {
180            // Listen for the first update check.
181            let notifier_client = {
182                let (notifier_client, notifier_request_stream) =
183                    fidl::endpoints::create_request_stream::<fupdate::NotifierMarker>();
184                let persisted = persisted.clone();
185                scope.spawn(async move {
186                    if let Err(e) = handle_update_done(notifier_request_stream, persisted).await {
187                        error!("Failed to handle NotifierRequest: {e}");
188                    }
189                });
190                notifier_client
191            };
192
193            match fuchsia_component::client::connect_to_protocol::<fupdate::ListenerMarker>() {
194                Ok(proxy) => {
195                    if let Err(e) = proxy.notify_on_first_update_check(
196                        fupdate::ListenerNotifyOnFirstUpdateCheckRequest {
197                            notifier: Some(notifier_client),
198                            ..Default::default()
199                        },
200                    ) {
201                        error!("Error subscribing to first update check; not publishing: {e:?}");
202                        *persisted.update_stage.lock() = UpdateCheckStage::Error;
203                    }
204                }
205                Err(e) => {
206                    // TODO(https://fxbug.dev/444526593): Consider bailing
207                    // if the update checker is not available.
208                    warn!(e:?; "Unable to connect to fuchsia.update.Listener; will publish immediately");
209                    *persisted.update_stage.lock() = UpdateCheckStage::Done;
210                }
211            }
212        }
213
214        Ok(Self {
215            persisted,
216            scheduler,
217            inspect: Arc::new(Mutex::new(Some(InspectState::Active(inspect_controller)))),
218        })
219    }
220
221    async fn from_escrow<'a>(
222        scope: fasync::ScopeHandle,
223        store: &'a sandbox::CapabilityStore,
224        dictionary: zx::NullableHandle,
225    ) -> Result<Self, Error> {
226        let dict = store
227            .import(fsandbox::DictionaryRef { token: dictionary.into() })
228            .await
229            .context("Error importing from component startup handle")?;
230
231        let persisted_bytes = dict
232            .get::<sandbox::Data<'a>>(INSTANCE_STATE_KEY)
233            .await
234            .context("Error getting instance state")?
235            .export::<Vec<u8>>()
236            .await
237            .context("Error exporting as buffer")?;
238        let persisted: PersistedState = ciborium::from_reader(&persisted_bytes[..])
239            .context("Failed to deserialize InstanceState")?;
240        let update_stage = persisted.update_stage.lock().clone();
241
242        let escrow_token = dict
243            .get::<sandbox::Handle<'a>>(FROZEN_INSPECT_VMO_KEY)
244            .await
245            .context("Failed to get frozen Inspect VMO")?
246            .export::<zx::NullableHandle>()
247            .await
248            .context("Failed to export handle")?;
249
250        let inspect = match update_stage {
251            UpdateCheckStage::Waiting | UpdateCheckStage::Error => {
252                // Create a new, writable Inspect tree. The previous instance of
253                // Persistence did not receive the signal to persist data from
254                // the last boot, but this instance might.
255                let escrow_token =
256                    finspect::EscrowToken { token: zx::EventPair::from(escrow_token) };
257
258                // Swap escrowed Inspect data with a new Tree server.
259                let inspect_runtime::FetchEscrowResult { vmo: _, server } =
260                    inspect_runtime::fetch_escrow(
261                        escrow_token,
262                        inspect_runtime::FetchEscrowOptions::new().replace_with_tree(),
263                    )
264                    .await
265                    .context("Failed to fetch escrowed Inspect data")?;
266
267                let opts = inspect_runtime::PublishOptions::default()
268                    .custom_scope(scope.clone())
269                    .on_tree_server(server.context("FetchEscrow did not return a TreeHandle")?);
270
271                let inspect_controller = inspect_runtime::publish(component::inspector(), opts)
272                    .context("Failed to publish Inspect data")?;
273
274                InspectState::Active(inspect_controller)
275            }
276            UpdateCheckStage::Done | UpdateCheckStage::Skipped => {
277                // Persistence has already published persisted data from last
278                // boot. By not republishing, the existing frozen Inspect data
279                // remains published.
280                //
281                // Persistence needs to continue running to record data to
282                // persist for the next boot.
283                InspectState::Escrowed(escrow_token)
284            }
285        };
286
287        // Do not spawn FIDL request handlers when returning from escrow. The
288        // previous component instance escrowed its request streams, sending
289        // them to the Component Framework. When an incoming request is received
290        // on escrowed channels held by the Component Framework, it will be
291        // routed to this instance's incoming namespace (via IncomingRequest)
292        // then this instance will spawn new request handlers.
293
294        Ok(Self {
295            scheduler: Scheduler::new(&persisted.config),
296            persisted: Arc::new(persisted),
297            inspect: Arc::new(Mutex::new(Some(inspect))),
298        })
299    }
300
301    async fn as_escrowed_dict(
302        store: &sandbox::CapabilityStore,
303        persisted: impl AsRef<PersistedState>,
304        inspect: Arc<Mutex<Option<InspectState>>>,
305    ) -> Result<fsandbox::DictionaryRef, Error> {
306        let dict = store.create_dictionary().await?;
307
308        // Save PersistedState
309        let mut persisted_bytes: Vec<u8> = Vec::new();
310        ciborium::into_writer(persisted.as_ref(), &mut persisted_bytes)
311            .context("Failed to serialize InstanceState")?;
312        let data = store.import(persisted_bytes).await?;
313        dict.insert(INSTANCE_STATE_KEY, data).await?;
314
315        // Save frozen Inspect VMO.
316        let inspect = inspect.lock().take();
317        match inspect {
318            Some(InspectState::Active(inspect_controller)) => {
319                match inspect_controller
320                    .escrow_frozen(inspect_runtime::EscrowOptions::default())
321                    .await
322                {
323                    Ok(escrow_token) => {
324                        let handle = escrow_token.token.into_handle();
325                        let data = store.import(handle).await?;
326                        dict.insert(FROZEN_INSPECT_VMO_KEY, data).await?;
327                    }
328                    Err(e) => {
329                        error!("Failed to escrow frozen Inspect VMO: {e:?}");
330                    }
331                }
332            }
333            Some(InspectState::Escrowed(handle)) => {
334                let data = store.import(handle).await?;
335                dict.insert(FROZEN_INSPECT_VMO_KEY, data).await?;
336            }
337            None => {}
338        }
339
340        dict.export().await.context("Failed to export escrowed dictionary")
341    }
342}
343
344/// Handle fuchsia.update/Notifier requests. Notifies of when an update check
345/// has been completed, signaling this component to publish persisted data to
346/// Inspect.
347async fn handle_update_done(
348    stream: fupdate::NotifierRequestStream,
349    persisted: Arc<PersistedState>,
350) -> Result<(), Error> {
351    let (stream, stalled) = detect_stall::until_stalled(stream, BUILD_CONFIG.stall_interval);
352    let mut stream = pin!(stream);
353    if let Ok(Some(request)) = stream.try_next().await {
354        debug!("Received fuchsia.update.NotifierRequest");
355        match request {
356            fupdate::NotifierRequest::Notify { control_handle } => {
357                debug!("Received notification that the update check has completed");
358                let stage = persisted.update_stage.lock().clone();
359                match stage {
360                    UpdateCheckStage::Skipped | UpdateCheckStage::Error => {
361                        unreachable!("Received impossible notification")
362                    }
363                    UpdateCheckStage::Waiting => {
364                        *persisted.update_stage.lock() = UpdateCheckStage::Done;
365                        info!("...Update check has completed; publishing previous boot data");
366                        publish_inspect_data().await;
367                        control_handle.shutdown();
368                        return Ok(());
369                    }
370                    UpdateCheckStage::Done => {
371                        debug!("Ignoring update check notification; already received one");
372                        control_handle.shutdown();
373                        return Ok(());
374                    }
375                }
376            }
377        }
378    }
379    if let Ok(Some(server_end)) = stalled.await {
380        // Send the server endpoint back to the framework.
381        debug!("Escrowing fuchsia.update.Notifier");
382        fuchsia_component::client::connect_channel_to_protocol_at_path(
383            server_end,
384            "/escrow/fuchsia.update.Notifier",
385        )
386        .context("Failed to connect to fuchsia.update.Notifier")?;
387    }
388    Ok(())
389}
390
391async fn publish_inspect_data() {
392    // TODO(https://fxbug.dev/444525059): Set health properly.
393    component::health().set_ok();
394    if let Err(e) = inspect_server::record_persist_node(PERSIST_NODE_NAME).await {
395        error!("Failed to serve persisted Inspect data from previous boot: {e}");
396    }
397    component::inspector().root().record_int(PUBLISHED_TIME_KEY, BootInstant::get().into_nanos());
398}
399
400enum IncomingRequest {
401    UpdateDone(fupdate::NotifierRequestStream),
402    SampleSink(fdiagnostics::SampleSinkRequestStream),
403}
404
405pub async fn main(_args: CommandLine) -> Result<(), Error> {
406    info!("Starting Diagnostics Persistence service");
407    // initialize to 5MiB
408    component::init_inspector_with_size(1024 * 1024 * 5);
409    let scope = fasync::Scope::new();
410    let store = sandbox::CapabilityStore::connect()?;
411    let state = ComponentState::load(scope.to_handle(), &store)
412        .await
413        .context("Error getting escrowed state")?;
414    component::health().set_starting_up();
415
416    let mut fs = ServiceFs::new();
417    fs.dir("svc").add_fidl_service(IncomingRequest::UpdateDone);
418    fs.dir("svc").add_fidl_service(IncomingRequest::SampleSink);
419    fs.take_and_serve_directory_handle().expect("Failed to take service directory handle");
420
421    let lifecycle =
422        fuchsia_runtime::take_startup_handle(HandleInfo::new(HandleType::Lifecycle, 0)).unwrap();
423    let lifecycle: zx::Channel = lifecycle.into();
424    let lifecycle: endpoints::ServerEnd<flifecycle::LifecycleMarker> = lifecycle.into();
425    let (mut lifecycle_request_stream, lifecycle_control_handle) =
426        lifecycle.into_stream_and_control_handle();
427    let mut lifecycle_task = pin!(
428        async move {
429            match lifecycle_request_stream.next().await {
430                Some(Ok(flifecycle::LifecycleRequest::Stop { .. })) => {
431                    debug!("Received stop request");
432                    // TODO(https://fxbug.dev/444529707): Teach `ServiceFs` and
433                    // others to skip the `until_stalled` timeout when this happens
434                    // so we can cleanly stop the component.
435                }
436                Some(Err(e)) => {
437                    error!("Received FIDL error from Lifecycle: {e:?}");
438                    std::future::pending::<()>().await
439                }
440                None => {
441                    debug!("Lifecycle request stream closed");
442                    std::future::pending::<()>().await
443                }
444            }
445        }
446        .fuse()
447    );
448
449    let mut outgoing_dir_task =
450        fs.until_stalled(BUILD_CONFIG.stall_interval).for_each_concurrent(None, move |item| {
451            let lifecycle_control_handle = lifecycle_control_handle.clone();
452            let state = state.clone();
453            let store = store.clone();
454            async move {
455                match item {
456                    fuchsia_component::server::Item::Request(req, _active_guard) => match req {
457                        IncomingRequest::UpdateDone(stream) => {
458                            if let Err(e) = handle_update_done(stream, state.persisted).await {
459                                error!("Failed to handle NotifierRequest: {e}");
460                            }
461                        },
462                        IncomingRequest::SampleSink(stream) => {
463                            if let Err(e) = state.scheduler.handle_sample_sink(stream).await {
464                                error!("Failed to handle SampleSinkRequest: {e}");
465                            }
466                        },
467                    },
468                    fuchsia_component::server::Item::Stalled(outgoing_directory) => {
469                        let escrowed_dictionary = match ComponentState::as_escrowed_dict(
470                            &store,
471                            state.persisted,
472                            state.inspect,
473                        ).await {
474                            Ok(dict) => Some(dict),
475                            Err(e) => {
476                                error!(
477                                    "Failed to serialize PersistedState into component dictionary: {e}"
478                                );
479                                None
480                            }
481                        };
482                        lifecycle_control_handle
483                            .send_on_escrow(flifecycle::LifecycleOnEscrowRequest {
484                                outgoing_dir: Some(outgoing_directory.into()),
485                                escrowed_dictionary,
486                                ..Default::default()
487                            })
488                            .unwrap();
489                    }
490                }
491            }
492        });
493
494    select! {
495        _ = lifecycle_task => {
496            info!("Stopping due to lifecycle request");
497            scope.cancel().await;
498        },
499        _ = outgoing_dir_task => {
500            info!("Stopping due to idle activity");
501            scope.join().await;
502        },
503    }
504
505    Ok(())
506}