persistence/
lib.rs

1// Copyright 2020 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! `diagnostics-persistence` component persists Inspect VMOs and serves them at the next boot.
6
7mod fetcher;
8mod file_handler;
9mod inspect_server;
10mod scheduler;
11
12use anyhow::{Context, Error, anyhow};
13use argh::FromArgs;
14use fidl::endpoints;
15use fidl::endpoints::ControlHandle;
16use fuchsia_component::server::ServiceFs;
17use fuchsia_inspect::health::Reporter;
18use fuchsia_inspect::{Inspector, InspectorConfig, component};
19use fuchsia_runtime::{HandleInfo, HandleType};
20use fuchsia_sync::Mutex;
21use futures::{FutureExt, StreamExt, TryStreamExt, select};
22use log::*;
23use persistence_build_config::Config;
24use sandbox::CapabilityRef;
25use scheduler::Scheduler;
26use serde::{Deserialize, Serialize};
27use std::pin::pin;
28use std::sync::{Arc, LazyLock};
29use zx::{BootInstant, HandleBased};
30use {
31    fidl_fuchsia_component_sandbox as fsandbox, fidl_fuchsia_diagnostics as fdiagnostics,
32    fidl_fuchsia_inspect as finspect, fidl_fuchsia_process_lifecycle as flifecycle,
33    fidl_fuchsia_update as fupdate, fuchsia_async as fasync,
34};
35
36/// The name of the subcommand and the logs-tag.
37pub const PROGRAM_NAME: &str = "persistence";
38pub const PERSIST_NODE_NAME: &str = "persist";
39/// Added after persisted data is fully published
40pub const PUBLISHED_TIME_KEY: &str = "published";
41
42/// Key in escrowed dictionary to immutable state persisted across instances of
43/// this component across the same boot.
44const INSTANCE_STATE_KEY: &str = "InstanceState";
45/// Key in escrowed dictionary to frozen Inspect VMO.
46const FROZEN_INSPECT_VMO_KEY: &str = "FrozenInspectVMO";
47
48/// Parsed CML structured configuration.
49#[derive(Clone, Debug)]
50pub(crate) struct BuildConfig {
51    /// If true, don't wait for a successful update check before publishing
52    /// previous boot's persisted Inspect data.
53    skip_update_check: bool,
54    /// Duration to wait for FIDL requests before stalling the connection.
55    stall_interval: zx::MonotonicDuration,
56}
57
58/// Build config, as defined by the CML structured configuration.
59pub(crate) static BUILD_CONFIG: LazyLock<BuildConfig> = LazyLock::new(|| {
60    let config = Config::take_from_startup_handle();
61    component::inspector().root().record_child("config", |node| config.record_inspect(node));
62
63    let Config { skip_update_check, stop_on_idle_timeout_millis } = config;
64
65    if skip_update_check {
66        info!("Configured to skip update check");
67    }
68
69    let stall_interval = if stop_on_idle_timeout_millis >= 0 {
70        info!("Configured to idle after {stop_on_idle_timeout_millis}ms of inactivity");
71        zx::MonotonicDuration::from_millis(stop_on_idle_timeout_millis)
72    } else {
73        info!("Not configured to idle after inactivity");
74        zx::MonotonicDuration::INFINITE
75    };
76
77    BuildConfig { skip_update_check, stall_interval }
78});
79
80/// Command line args
81#[derive(FromArgs, Debug, PartialEq)]
82#[argh(subcommand, name = "persistence")]
83pub struct CommandLine {}
84
85#[derive(Debug, Clone, Serialize, Deserialize)]
86enum UpdateCheckStage {
87    /// Waiting for the first update check before publishing previous boot
88    /// inspect data.
89    Waiting,
90    /// First update check was skipped, previous boot inspect data has been published.
91    Skipped,
92    /// First update check has completed, previous boot inspect data has been
93    /// published.
94    Done,
95    /// Unable to subscribe to the first update check.
96    Error,
97}
98/// State to be persisted between instances of this component across
99/// the same boot.
100#[derive(Debug, Serialize, Deserialize)]
101struct PersistedState {
102    /// Persistence config loaded from disk.
103    config: persistence_config::Config,
104    /// Stage of the update check.
105    update_stage: Mutex<UpdateCheckStage>,
106}
107
108/// All component-specific state.
109#[derive(Clone)]
110struct ComponentState {
111    /// State persisted across instances of this component.
112    persisted: Arc<PersistedState>,
113    /// Listener for Sample
114    scheduler: Scheduler,
115    /// Controller to republish escrowed Inspect data, if available.
116    inspect_controller: Arc<Mutex<Option<inspect_runtime::PublishedInspectController>>>,
117}
118
119impl ComponentState {
120    /// Load state from a previous instance if possible, otherwise initialize
121    /// new state.
122    async fn load(
123        scope: fasync::ScopeHandle,
124        store: &sandbox::CapabilityStore,
125    ) -> Result<Self, Error> {
126        if let Some(dictionary) =
127            fuchsia_runtime::take_startup_handle(HandleInfo::new(HandleType::EscrowedDictionary, 0))
128        {
129            debug!("Loading component state from escrowed dictionary");
130            return ComponentState::from_escrow(scope.clone(), store, dictionary)
131                .await
132                .context("Failed to load component state from escrowed dictionary");
133        }
134
135        debug!("No escrowed dictionary available; generating one");
136        Self::new(scope.clone()).await.context("Failed to create component state")
137    }
138
139    async fn new(scope: fasync::ScopeHandle) -> Result<Self, Error> {
140        let inspect_controller = inspect_runtime::publish(
141            component::inspector(),
142            inspect_runtime::PublishOptions::default().custom_scope(scope.clone()),
143        )
144        .ok_or_else(|| anyhow!("failed to publish inspect"))?;
145
146        let config =
147            persistence_config::load_configuration_files().context("Error loading configs")?;
148        file_handler::forget_old_data(&config).await?;
149
150        let scheduler = Scheduler::new(&config);
151        scheduler
152            .subscribe(scope.clone(), &config)
153            .await
154            .context("Failed to subscribe to fuchsia.diagnostics.Sample")?;
155
156        let persisted = {
157            let update_stage = if BUILD_CONFIG.skip_update_check {
158                UpdateCheckStage::Skipped
159            } else {
160                UpdateCheckStage::Waiting
161            };
162            Arc::new(PersistedState { config, update_stage: Mutex::new(update_stage) })
163        };
164
165        if BUILD_CONFIG.skip_update_check {
166            publish_inspect_data().await;
167        } else {
168            // Listen for the first update check.
169            let notifier_client = {
170                let (notifier_client, notifier_request_stream) =
171                    fidl::endpoints::create_request_stream::<fupdate::NotifierMarker>();
172                let persisted = persisted.clone();
173                scope.spawn(async move {
174                    if let Err(e) = handle_update_done(notifier_request_stream, persisted).await {
175                        error!("Failed to handle NotifierRequest: {e}");
176                    }
177                });
178                notifier_client
179            };
180
181            match fuchsia_component::client::connect_to_protocol::<fupdate::ListenerMarker>() {
182                Ok(proxy) => {
183                    if let Err(e) = proxy.notify_on_first_update_check(
184                        fupdate::ListenerNotifyOnFirstUpdateCheckRequest {
185                            notifier: Some(notifier_client),
186                            ..Default::default()
187                        },
188                    ) {
189                        error!("Error subscribing to first update check; not publishing: {e:?}");
190                        *persisted.update_stage.lock() = UpdateCheckStage::Error;
191                    }
192                }
193                Err(e) => {
194                    // TODO(https://fxbug.dev/444526593): Consider bailing
195                    // if the update checker is not available.
196                    warn!(e:?; "Unable to connect to fuchsia.update.Listener; will publish immediately");
197                    *persisted.update_stage.lock() = UpdateCheckStage::Done;
198                }
199            }
200        }
201
202        Ok(Self {
203            persisted,
204            scheduler,
205            inspect_controller: Arc::new(Mutex::new(Some(inspect_controller))),
206        })
207    }
208
209    async fn from_escrow<'a>(
210        scope: fasync::ScopeHandle,
211        store: &'a sandbox::CapabilityStore,
212        dictionary: zx::NullableHandle,
213    ) -> Result<Self, Error> {
214        let dict = store
215            .import(fsandbox::DictionaryRef { token: dictionary.into() })
216            .await
217            .context("Error importing from component startup handle")?;
218
219        let inspect_controller = {
220            let escrow_token = dict
221                .get::<sandbox::Handle<'a>>(FROZEN_INSPECT_VMO_KEY)
222                .await
223                .context("Failed to get frozen Inspect VMO")?
224                .export::<zx::NullableHandle>()
225                .await
226                .context("Failed to export handle")?
227                .into();
228
229            // Swap escrowed Inspect data with a new Tree server.
230            let token = finspect::EscrowToken { token: escrow_token };
231            let inspect_runtime::FetchEscrowResult { vmo, server } = inspect_runtime::fetch_escrow(
232                token,
233                inspect_runtime::FetchEscrowOptions::new().replace_with_tree(),
234            )
235            .await
236            .context("Failed to fetch escrowed Inspect data")?;
237
238            let opts = inspect_runtime::PublishOptions::default()
239                .custom_scope(scope.clone())
240                .on_tree_server(server.context("FetchEscrow did not return a TreeHandle")?);
241
242            // Check if Persistence has already published persisted data from last boot.
243            let escrowed_inspector = Inspector::new(InspectorConfig::default().vmo(vmo));
244            let escrowed_data = fuchsia_inspect::reader::read(&escrowed_inspector)
245                .await
246                .context("Failed to read escrowed Inspect data")?;
247
248            if let Some(_) = escrowed_data.get_property(PUBLISHED_TIME_KEY)
249                && let Some(_) = escrowed_data.get_child(PERSIST_NODE_NAME)
250            {
251                // Republish the read-only VMO. The VMO already contains
252                // persisted data from the last boot; no more work is necessary.
253                //
254                // Persistence needs to continue running to record data to
255                // persist for the next boot.
256                inspect_runtime::publish(&escrowed_inspector, opts)
257                    .context("Failed to publish escrowed (read-only) Inspect data")?
258            } else {
259                // Create a new, writable Inspect tree. The previous instance of
260                // Persistence did not receive the signal to persist data from
261                // the last boot, but this instance might.
262                inspect_runtime::publish(component::inspector(), opts)
263                    .context("Failed to publish Inspect data")?
264            }
265        };
266
267        let persisted_bytes = dict
268            .get::<sandbox::Data<'a>>(INSTANCE_STATE_KEY)
269            .await
270            .context("Error getting instance state")?
271            .export::<Vec<u8>>()
272            .await
273            .context("Error exporting as buffer")?;
274        let persisted: PersistedState = ciborium::from_reader(&persisted_bytes[..])
275            .context("Failed to deserialize InstanceState")?;
276
277        // Do not spawn FIDL request handlers when returning from escrow. The
278        // previous component instance escrowed its request streams, sending
279        // them to the Component Framework. When an incoming request is received
280        // on escrowed channels held by the Component Framework, it will be
281        // routed to this instance's incoming namespace (via IncomingRequest)
282        // then this instance will spawn new request handlers.
283
284        Ok(Self {
285            scheduler: Scheduler::new(&persisted.config),
286            persisted: Arc::new(persisted),
287            inspect_controller: Arc::new(Mutex::new(Some(inspect_controller))),
288        })
289    }
290
291    async fn as_escrowed_dict(
292        store: &sandbox::CapabilityStore,
293        persisted: impl AsRef<PersistedState>,
294        inspect_controller: Arc<Mutex<Option<inspect_runtime::PublishedInspectController>>>,
295    ) -> Result<fsandbox::DictionaryRef, Error> {
296        let dict = store.create_dictionary().await?;
297
298        // Save PersistedState
299        let mut persisted_bytes: Vec<u8> = Vec::new();
300        ciborium::into_writer(persisted.as_ref(), &mut persisted_bytes)
301            .context("Failed to serialize InstanceState")?;
302        let data = store.import(persisted_bytes).await?;
303        dict.insert(INSTANCE_STATE_KEY, data).await?;
304
305        // Save frozen Inspect VMO.
306        let inspect_controller = inspect_controller.lock().take();
307        if let Some(inspect_controller) = inspect_controller {
308            match inspect_controller.escrow_frozen(inspect_runtime::EscrowOptions::default()).await
309            {
310                Some(escrow_token) => {
311                    let handle = escrow_token.token.into_handle();
312                    let data = store.import(handle).await?;
313                    dict.insert(FROZEN_INSPECT_VMO_KEY, data).await?;
314                }
315                None => {
316                    error!("Failed to escrow frozen Inspect VMO");
317                }
318            }
319        }
320
321        dict.export().await.context("Failed to export escrowed dictionary")
322    }
323}
324
325/// Handle fuchsia.update/Notifier requests. Notifies of when an update check
326/// has been completed, signaling this component to publish persisted data to
327/// Inspect.
328async fn handle_update_done(
329    stream: fupdate::NotifierRequestStream,
330    persisted: Arc<PersistedState>,
331) -> Result<(), Error> {
332    let (stream, stalled) = detect_stall::until_stalled(stream, BUILD_CONFIG.stall_interval);
333    let mut stream = pin!(stream);
334    if let Ok(Some(request)) = stream.try_next().await {
335        debug!("Received fuchsia.update.NotifierRequest");
336        match request {
337            fupdate::NotifierRequest::Notify { control_handle } => {
338                debug!("Received notification that the update check has completed");
339                let stage = persisted.update_stage.lock().clone();
340                match stage {
341                    UpdateCheckStage::Skipped | UpdateCheckStage::Error => {
342                        unreachable!("Received impossible notification")
343                    }
344                    UpdateCheckStage::Waiting => {
345                        *persisted.update_stage.lock() = UpdateCheckStage::Done;
346                        info!("...Update check has completed; publishing previous boot data");
347                        publish_inspect_data().await;
348                        control_handle.shutdown();
349                        return Ok(());
350                    }
351                    UpdateCheckStage::Done => {
352                        debug!("Ignoring update check notification; already received one");
353                        control_handle.shutdown();
354                        return Ok(());
355                    }
356                }
357            }
358        }
359    }
360    if let Ok(Some(server_end)) = stalled.await {
361        // Send the server endpoint back to the framework.
362        debug!("Escrowing fuchsia.update.Notifier");
363        fuchsia_component::client::connect_channel_to_protocol_at_path(
364            server_end,
365            "/escrow/fuchsia.update.Notifier",
366        )
367        .context("Failed to connect to fuchsia.update.Notifier")?;
368    }
369    Ok(())
370}
371
372async fn publish_inspect_data() {
373    // TODO(https://fxbug.dev/444525059): Set health properly.
374    component::health().set_ok();
375    if let Err(e) = inspect_server::record_persist_node(PERSIST_NODE_NAME).await {
376        error!("Failed to serve persisted Inspect data from previous boot: {e}");
377    }
378    component::inspector().root().record_int(PUBLISHED_TIME_KEY, BootInstant::get().into_nanos());
379}
380
381enum IncomingRequest {
382    UpdateDone(fupdate::NotifierRequestStream),
383    SampleSink(fdiagnostics::SampleSinkRequestStream),
384}
385
386pub async fn main(_args: CommandLine) -> Result<(), Error> {
387    info!("Starting Diagnostics Persistence service");
388    let scope = fasync::Scope::new();
389    let store = sandbox::CapabilityStore::connect()?;
390    let state = ComponentState::load(scope.to_handle(), &store)
391        .await
392        .context("Error getting escrowed state")?;
393    component::health().set_starting_up();
394
395    let mut fs = ServiceFs::new();
396    fs.dir("svc").add_fidl_service(IncomingRequest::UpdateDone);
397    fs.dir("svc").add_fidl_service(IncomingRequest::SampleSink);
398    fs.take_and_serve_directory_handle().expect("Failed to take service directory handle");
399
400    let lifecycle =
401        fuchsia_runtime::take_startup_handle(HandleInfo::new(HandleType::Lifecycle, 0)).unwrap();
402    let lifecycle: zx::Channel = lifecycle.into();
403    let lifecycle: endpoints::ServerEnd<flifecycle::LifecycleMarker> = lifecycle.into();
404    let (mut lifecycle_request_stream, lifecycle_control_handle) =
405        lifecycle.into_stream_and_control_handle();
406    let mut lifecycle_task = pin!(
407        async move {
408            match lifecycle_request_stream.next().await {
409                Some(Ok(flifecycle::LifecycleRequest::Stop { .. })) => {
410                    debug!("Received stop request");
411                    // TODO(https://fxbug.dev/444529707): Teach `ServiceFs` and
412                    // others to skip the `until_stalled` timeout when this happens
413                    // so we can cleanly stop the component.
414                }
415                Some(Err(e)) => {
416                    error!("Received FIDL error from Lifecycle: {e:?}");
417                    std::future::pending::<()>().await
418                }
419                None => {
420                    debug!("Lifecycle request stream closed");
421                    std::future::pending::<()>().await
422                }
423            }
424        }
425        .fuse()
426    );
427
428    let mut outgoing_dir_task =
429        fs.until_stalled(BUILD_CONFIG.stall_interval).for_each_concurrent(None, move |item| {
430            let lifecycle_control_handle = lifecycle_control_handle.clone();
431            let state = state.clone();
432            let store = store.clone();
433            async move {
434                match item {
435                    fuchsia_component::server::Item::Request(req, _active_guard) => match req {
436                        IncomingRequest::UpdateDone(stream) => {
437                            if let Err(e) = handle_update_done(stream, state.persisted).await {
438                                error!("Failed to handle NotifierRequest: {e}");
439                            }
440                        },
441                        IncomingRequest::SampleSink(stream) => {
442                            if let Err(e) = state.scheduler.handle_sample_sink(stream).await {
443                                error!("Failed to handle SampleSinkRequest: {e}");
444                            }
445                        },
446                    },
447                    fuchsia_component::server::Item::Stalled(outgoing_directory) => {
448                        let escrowed_dictionary = match ComponentState::as_escrowed_dict(
449                            &store,
450                            state.persisted,
451                            state.inspect_controller
452                        ).await {
453                            Ok(dict) => Some(dict),
454                            Err(e) => {
455                                error!(
456                                    "Failed to serialize PersistedState into component dictionary: {e}"
457                                );
458                                None
459                            }
460                        };
461                        lifecycle_control_handle
462                            .send_on_escrow(flifecycle::LifecycleOnEscrowRequest {
463                                outgoing_dir: Some(outgoing_directory.into()),
464                                escrowed_dictionary,
465                                ..Default::default()
466                            })
467                            .unwrap();
468                    }
469                }
470            }
471        });
472
473    select! {
474        _ = lifecycle_task => {
475            info!("Stopping due to lifecycle request");
476            scope.cancel().await;
477        },
478        _ = outgoing_dir_task => {
479            info!("Stopping due to idle activity");
480            scope.join().await;
481        },
482    }
483
484    Ok(())
485}