tor_hsservice/
status.rs

1//! Support for reporting the status of an onion service.
2
3use crate::internal_prelude::*;
4
5/// The current reported status of an onion service.
6#[derive(Debug, Clone, Eq, PartialEq)]
7pub struct OnionServiceStatus {
8    /// The current high-level state for the IPT manager.
9    ipt_mgr: ComponentStatus,
10
11    /// The current high-level state for the descriptor publisher.
12    publisher: ComponentStatus,
13    // TODO (#1194): Add key expiration
14    //
15    // NOTE: Do _not_ add general metrics (like failure/success rates , number
16    // of intro points, etc) here.
17}
18
19/// The current reported status of an onion service subsystem.
20#[derive(Debug, Clone)]
21pub(crate) struct ComponentStatus {
22    /// The current high-level state.
23    state: State,
24
25    /// The last error we have seen.
26    latest_error: Option<Problem>,
27}
28
29impl ComponentStatus {
30    /// Create a new ComponentStatus for a component that has not been bootstrapped.
31    fn new_shutdown() -> Self {
32        Self {
33            state: State::Shutdown,
34            latest_error: None,
35        }
36    }
37}
38
39impl PartialEq for ComponentStatus {
40    fn eq(&self, other: &Self) -> bool {
41        let Self {
42            state,
43            latest_error,
44        } = self;
45        let Self {
46            state: state_other,
47            latest_error: lastest_error_other,
48        } = other;
49
50        // NOTE: Errors are never equal. We _could_ add half-baked PartialEq implementations for
51        // all of our error types, but it doesn't seem worth it. If there is a state change, or if
52        // we've encountered an error (even if it's the same as the previous one), we'll notify the
53        // watchers.
54        state == state_other && latest_error.is_none() && lastest_error_other.is_none()
55    }
56}
57
58impl Eq for ComponentStatus {}
59
60/// The high-level state of an onion service.
61///
62/// This type summarizes the most basic information about an onion service's
63/// status.
64#[derive(Copy, Clone, Debug, Eq, PartialEq)]
65#[non_exhaustive]
66pub enum State {
67    /// The service is not launched.
68    ///
69    /// Either [`OnionService::launch`](crate::OnionService::launch) has not
70    /// been called, or the service has been shut down.
71    ///
72    /// ## Reachability
73    ///
74    /// The service is not reachable.
75    Shutdown,
76    /// The service is bootstrapping.
77    ///
78    /// Specifically, we have been offline, or we just initialized:
79    /// We are trying to build introduction points and publish a descriptor,
80    /// and haven't hit any significant problems yet.
81    ///
82    /// ## Reachability
83    ///
84    /// The service is not fully reachable, but may be reachable by some clients.
85    Bootstrapping,
86    /// The service is running in a degraded state.
87    ///
88    /// Specifically, we are not satisfied with our introduction points, but
89    /// we do have a number of working introduction points,
90    /// and our descriptor is up-to-date.
91    ///
92    /// ## Reachability
93    ///
94    /// The service is reachable.
95    ///
96    // TODO: this variant is only used by the IptManager.
97    // We should split this enum into IptManagerState and PublisherState.
98    DegradedReachable,
99    /// The service is running in a degraded state.
100    ///
101    /// Specifically, we have a number of working introduction points,
102    /// but we have failed to upload the descriptor to one or more HsDirs.
103    ///
104    /// ## Reachability
105    ///
106    /// The service is unlikely to be reachable.
107    ///
108    DegradedUnreachable,
109    /// The service is running.
110    ///
111    /// Specifically, we are satisfied with our introduction points, and our
112    /// descriptor is up-to-date.
113    ///
114    /// ## Reachability
115    ///
116    /// The service is believed to be fully reachable.
117    Running,
118    /// The service is trying to recover from a minor interruption.
119    ///
120    /// Specifically:
121    ///   * We have encountered a problem (like a dead intro point or an
122    ///     intermittent failure to upload a descriptor)
123    ///   * We are trying to recover from the problem.
124    ///   * We have not yet failed.
125    ///
126    /// ## Reachability
127    ///
128    /// The service is unlikely to be reachable.
129    ///
130    //
131    // NOTE: this status is currently only set by `IptManager` whenever:
132    //   * there are no good IPTs (so the service will be unreachable); or
133    //   * there aren't enough good IPTs to publish (AFAICT in this case the service
134    //   may be reachable, if the IPTs we _do_ have are have previously been published).
135    //
136    // TODO (#1270): split this state into 2 different states (one for the "service is
137    // still reachable" case, and another for the "unreachable" one).
138    Recovering,
139    /// The service is not working.
140    ///
141    /// Specifically, there is a problem with this onion service, and either it
142    /// is one we cannot recover from, or we have tried for a while to recover
143    /// and have failed.
144    ///
145    /// ## Reachability
146    ///
147    /// The service is not fully reachable. It may temporarily be reachable by some clients.
148    Broken,
149}
150
151impl State {
152    /// Check whether the service is *believed* to be fully reachable.
153    ///
154    /// This is at best an implication in one direction, even if this returns
155    /// `false`, the service may still be reachable.
156    pub fn is_fully_reachable(&self) -> bool {
157        matches!(self, Self::Running | Self::DegradedReachable)
158    }
159}
160
161/// An error type for descriptor upload failures with retries.
162#[derive(Clone, Debug, thiserror::Error)]
163#[non_exhaustive]
164pub enum DescUploadRetryError {
165    /// A fatal (non-transient) error occurred.
166    #[error("A fatal (non-transient) error occurred")]
167    FatalError(RetryError<DescUploadError>),
168
169    /// Ran out of retries.
170    #[error("Ran out of retries")]
171    MaxRetryCountExceeded(RetryError<DescUploadError>),
172
173    /// Exceeded the maximum allowed time.
174    #[error("Timeout exceeded")]
175    Timeout(RetryError<DescUploadError>),
176
177    /// Encountered an internal error.
178    #[error("Internal error")]
179    Bug(#[from] Bug),
180}
181
182/// A problem encountered by an onion service.
183#[derive(Clone, Debug, derive_more::From)]
184#[non_exhaustive]
185pub enum Problem {
186    /// A fatal error occurred.
187    Runtime(FatalError),
188
189    /// One or more descriptor uploads failed.
190    DescriptorUpload(Vec<DescUploadRetryError>),
191
192    /// We failed to establish one or more introduction points.
193    Ipt(Vec<IptError>),
194    // TODO: add variants for other transient errors?
195}
196
197impl OnionServiceStatus {
198    /// Create a new OnionServiceStatus for a service that has not been bootstrapped.
199    pub(crate) fn new_shutdown() -> Self {
200        Self {
201            ipt_mgr: ComponentStatus::new_shutdown(),
202            publisher: ComponentStatus::new_shutdown(),
203        }
204    }
205
206    /// Return the current high-level state of this onion service.
207    ///
208    /// The overall state is derived from the `State`s of its underlying components
209    /// (i.e. the IPT manager and descriptor publisher).
210    pub fn state(&self) -> State {
211        use State::*;
212
213        match (self.ipt_mgr.state, self.publisher.state) {
214            (Shutdown, _) | (_, Shutdown) => Shutdown,
215            (Bootstrapping, _) | (_, Bootstrapping) => Bootstrapping,
216            (Running, Running) => Running,
217            (Recovering, _) | (_, Recovering) => Recovering,
218            (Broken, _) | (_, Broken) => Broken,
219            (DegradedUnreachable, _) | (_, DegradedUnreachable) => DegradedUnreachable,
220            (DegradedReachable, Running)
221            | (Running, DegradedReachable)
222            | (DegradedReachable, DegradedReachable) => DegradedReachable,
223        }
224    }
225
226    /// Return the most severe current problem
227    pub fn current_problem(&self) -> Option<&Problem> {
228        match (&self.ipt_mgr.latest_error, &self.publisher.latest_error) {
229            (None, None) => None,
230            (Some(e), Some(_)) => {
231                // For now, assume IPT manager errors are always more severe
232                // TODO: decide which error is the more severe (or return both)
233                Some(e)
234            }
235            (_, Some(e)) | (Some(e), _) => Some(e),
236        }
237    }
238
239    /// Return a time before which the user must re-provision this onion service
240    /// with new keys.
241    ///
242    /// Returns `None` if the onion service is able to generate and sign new
243    /// keys as needed.
244    pub fn provisioned_key_expiration(&self) -> Option<SystemTime> {
245        None // TODO (#1194): Implement
246    }
247}
248
249/// A stream of OnionServiceStatus events, returned by an onion service.
250///
251/// Note that multiple status change events may be coalesced into one if the
252/// receiver does not read them as fast as they are generated.  Note also
253/// that it's possible for an item to arise in this stream without an underlying
254/// change having occurred.
255///
256//
257// We define this so that we aren't exposing postage in our public API.
258#[derive(Clone)]
259pub struct OnionServiceStatusStream(postage::watch::Receiver<OnionServiceStatus>);
260
261impl futures::Stream for OnionServiceStatusStream {
262    type Item = OnionServiceStatus;
263
264    fn poll_next(
265        mut self: std::pin::Pin<&mut Self>,
266        cx: &mut std::task::Context<'_>,
267    ) -> std::task::Poll<Option<Self::Item>> {
268        self.0.poll_next_unpin(cx)
269    }
270}
271
272/// A shared handle to a postage::watch::Sender that we can use to update an OnionServiceStatus.
273//
274// TODO: Possibly, we don't need this to be Clone: as we implement the code
275// that adjusts the status, we might find that only a single location needs to
276// hold the Sender.  If that turns out to be the case, we should remove the
277// `Arc<Mutex<.>>` here.  If not, we should remove this comment.
278#[derive(Clone)]
279pub(crate) struct StatusSender(Arc<Mutex<postage::watch::Sender<OnionServiceStatus>>>);
280
281/// A handle that can be used by the [`IptManager`]
282/// to update the [`OnionServiceStatus`].
283#[derive(Clone, derive_more::From)]
284pub(crate) struct IptMgrStatusSender(StatusSender);
285
286/// A handle that can be used by the [`Publisher`]
287/// to update the [`OnionServiceStatus`].
288#[derive(Clone, derive_more::From)]
289pub(crate) struct PublisherStatusSender(StatusSender);
290
291/// A helper for implementing [`PublisherStatusSender`] and [`IptMgrStatusSender`].
292///
293/// TODO: this macro is a bit repetitive, it would be nice if we could reduce duplication even
294/// further (and auto-generate a `note_<state>` function for every `State` variant).
295macro_rules! impl_status_sender {
296    ($sender:ident, $field:ident) => {
297        impl $sender {
298            /// Update `latest_error` and set the underlying state to `Broken`.
299            ///
300            /// If the new state is different, this updates the current status
301            /// and notifies all listeners.
302            pub(crate) fn send_broken(&self, err: impl Into<Problem>) {
303                self.send(State::Broken, Some(err.into()));
304            }
305
306            /// Update `latest_error` and set the underlying state to `Recovering`.
307            ///
308            /// If the new state is different, this updates the current status
309            /// and notifies all listeners.
310            #[allow(dead_code)] // NOTE: this is dead code in PublisherStatusSender
311            pub(crate) fn send_recovering(&self, err: impl Into<Problem>) {
312                self.send(State::Recovering, Some(err.into()));
313            }
314
315            /// Set `latest_error` to `None` and the underlying state to `Shutdown`.
316            ///
317            /// If the new state is different, this updates the current status
318            /// and notifies all listeners.
319            pub(crate) fn send_shutdown(&self) {
320                self.send(State::Shutdown, None);
321            }
322
323            /// Update the underlying state and latest_error.
324            ///
325            /// If the new state is different, this updates the current status
326            /// and notifies all listeners.
327            pub(crate) fn send(&self, state: State, err: Option<Problem>) {
328                let sender = &self.0;
329                let mut tx = sender.0.lock().expect("Poisoned lock");
330                let mut svc_status = tx.borrow().clone();
331                svc_status.$field.state = state;
332                svc_status.$field.latest_error = err;
333                tx.maybe_send(|_| svc_status);
334            }
335        }
336    };
337}
338
339impl_status_sender!(IptMgrStatusSender, ipt_mgr);
340impl_status_sender!(PublisherStatusSender, publisher);
341
342impl StatusSender {
343    /// Create a new StatusSender with a given initial status.
344    pub(crate) fn new(initial_status: OnionServiceStatus) -> Self {
345        let (tx, _) = postage::watch::channel_with(initial_status);
346        StatusSender(Arc::new(Mutex::new(tx)))
347    }
348
349    /// Return a copy of the current status.
350    pub(crate) fn get(&self) -> OnionServiceStatus {
351        self.0.lock().expect("Poisoned lock").borrow().clone()
352    }
353
354    /// Return a new OnionServiceStatusStream to return events from this StatusSender.
355    pub(crate) fn subscribe(&self) -> OnionServiceStatusStream {
356        OnionServiceStatusStream(self.0.lock().expect("Poisoned lock").subscribe())
357    }
358}
359
360#[cfg(test)]
361impl PublisherStatusSender {
362    /// Return a new OnionServiceStatusStream to return events from this StatusSender.
363    pub(crate) fn subscribe(&self) -> OnionServiceStatusStream {
364        self.0.subscribe()
365    }
366}
367
368#[cfg(test)]
369impl OnionServiceStatus {
370    /// Return the current high-level state of the publisher`.
371    pub(crate) fn publisher_status(&self) -> ComponentStatus {
372        self.publisher.clone()
373    }
374}
375
376#[cfg(test)]
377impl ComponentStatus {
378    /// The current `State` of this component.
379    pub(crate) fn state(&self) -> State {
380        self.state
381    }
382
383    /// The current error of this component.
384    pub(crate) fn current_problem(&self) -> Option<&Problem> {
385        self.latest_error.as_ref()
386    }
387}