tor_hsservice/status.rs
1//! Support for reporting the status of an onion service.
2
3use crate::internal_prelude::*;
4
5/// The current reported status of an onion service.
6#[derive(Debug, Clone, Eq, PartialEq)]
7pub struct OnionServiceStatus {
8 /// The current high-level state for the IPT manager.
9 ipt_mgr: ComponentStatus,
10
11 /// The current high-level state for the descriptor publisher.
12 publisher: ComponentStatus,
13 // TODO (#1194): Add key expiration
14 //
15 // NOTE: Do _not_ add general metrics (like failure/success rates , number
16 // of intro points, etc) here.
17}
18
19/// The current reported status of an onion service subsystem.
20#[derive(Debug, Clone)]
21pub(crate) struct ComponentStatus {
22 /// The current high-level state.
23 state: State,
24
25 /// The last error we have seen.
26 latest_error: Option<Problem>,
27}
28
29impl ComponentStatus {
30 /// Create a new ComponentStatus for a component that has not been bootstrapped.
31 fn new_shutdown() -> Self {
32 Self {
33 state: State::Shutdown,
34 latest_error: None,
35 }
36 }
37}
38
39impl PartialEq for ComponentStatus {
40 fn eq(&self, other: &Self) -> bool {
41 let Self {
42 state,
43 latest_error,
44 } = self;
45 let Self {
46 state: state_other,
47 latest_error: lastest_error_other,
48 } = other;
49
50 // NOTE: Errors are never equal. We _could_ add half-baked PartialEq implementations for
51 // all of our error types, but it doesn't seem worth it. If there is a state change, or if
52 // we've encountered an error (even if it's the same as the previous one), we'll notify the
53 // watchers.
54 state == state_other && latest_error.is_none() && lastest_error_other.is_none()
55 }
56}
57
58impl Eq for ComponentStatus {}
59
60/// The high-level state of an onion service.
61///
62/// This type summarizes the most basic information about an onion service's
63/// status.
64#[derive(Copy, Clone, Debug, Eq, PartialEq)]
65#[non_exhaustive]
66pub enum State {
67 /// The service is not launched.
68 ///
69 /// Either [`OnionService::launch`](crate::OnionService::launch) has not
70 /// been called, or the service has been shut down.
71 ///
72 /// ## Reachability
73 ///
74 /// The service is not reachable.
75 Shutdown,
76 /// The service is bootstrapping.
77 ///
78 /// Specifically, we have been offline, or we just initialized:
79 /// We are trying to build introduction points and publish a descriptor,
80 /// and haven't hit any significant problems yet.
81 ///
82 /// ## Reachability
83 ///
84 /// The service is not fully reachable, but may be reachable by some clients.
85 Bootstrapping,
86 /// The service is running in a degraded state.
87 ///
88 /// Specifically, we are not satisfied with our introduction points, but
89 /// we do have a number of working introduction points,
90 /// and our descriptor is up-to-date.
91 ///
92 /// ## Reachability
93 ///
94 /// The service is reachable.
95 ///
96 // TODO: this variant is only used by the IptManager.
97 // We should split this enum into IptManagerState and PublisherState.
98 DegradedReachable,
99 /// The service is running in a degraded state.
100 ///
101 /// Specifically, we have a number of working introduction points,
102 /// but we have failed to upload the descriptor to one or more HsDirs.
103 ///
104 /// ## Reachability
105 ///
106 /// The service is unlikely to be reachable.
107 ///
108 DegradedUnreachable,
109 /// The service is running.
110 ///
111 /// Specifically, we are satisfied with our introduction points, and our
112 /// descriptor is up-to-date.
113 ///
114 /// ## Reachability
115 ///
116 /// The service is believed to be fully reachable.
117 Running,
118 /// The service is trying to recover from a minor interruption.
119 ///
120 /// Specifically:
121 /// * We have encountered a problem (like a dead intro point or an
122 /// intermittent failure to upload a descriptor)
123 /// * We are trying to recover from the problem.
124 /// * We have not yet failed.
125 ///
126 /// ## Reachability
127 ///
128 /// The service is unlikely to be reachable.
129 ///
130 //
131 // NOTE: this status is currently only set by `IptManager` whenever:
132 // * there are no good IPTs (so the service will be unreachable); or
133 // * there aren't enough good IPTs to publish (AFAICT in this case the service
134 // may be reachable, if the IPTs we _do_ have are have previously been published).
135 //
136 // TODO (#1270): split this state into 2 different states (one for the "service is
137 // still reachable" case, and another for the "unreachable" one).
138 Recovering,
139 /// The service is not working.
140 ///
141 /// Specifically, there is a problem with this onion service, and either it
142 /// is one we cannot recover from, or we have tried for a while to recover
143 /// and have failed.
144 ///
145 /// ## Reachability
146 ///
147 /// The service is not fully reachable. It may temporarily be reachable by some clients.
148 Broken,
149}
150
151impl State {
152 /// Check whether the service is *believed* to be fully reachable.
153 ///
154 /// This is at best an implication in one direction, even if this returns
155 /// `false`, the service may still be reachable.
156 pub fn is_fully_reachable(&self) -> bool {
157 matches!(self, Self::Running | Self::DegradedReachable)
158 }
159}
160
161/// An error type for descriptor upload failures with retries.
162#[derive(Clone, Debug, thiserror::Error)]
163#[non_exhaustive]
164pub enum DescUploadRetryError {
165 /// A fatal (non-transient) error occurred.
166 #[error("A fatal (non-transient) error occurred")]
167 FatalError(RetryError<DescUploadError>),
168
169 /// Ran out of retries.
170 #[error("Ran out of retries")]
171 MaxRetryCountExceeded(RetryError<DescUploadError>),
172
173 /// Exceeded the maximum allowed time.
174 #[error("Timeout exceeded")]
175 Timeout(RetryError<DescUploadError>),
176
177 /// Encountered an internal error.
178 #[error("Internal error")]
179 Bug(#[from] Bug),
180}
181
182/// A problem encountered by an onion service.
183#[derive(Clone, Debug, derive_more::From)]
184#[non_exhaustive]
185pub enum Problem {
186 /// A fatal error occurred.
187 Runtime(FatalError),
188
189 /// One or more descriptor uploads failed.
190 DescriptorUpload(Vec<DescUploadRetryError>),
191
192 /// We failed to establish one or more introduction points.
193 Ipt(Vec<IptError>),
194 // TODO: add variants for other transient errors?
195}
196
197impl OnionServiceStatus {
198 /// Create a new OnionServiceStatus for a service that has not been bootstrapped.
199 pub(crate) fn new_shutdown() -> Self {
200 Self {
201 ipt_mgr: ComponentStatus::new_shutdown(),
202 publisher: ComponentStatus::new_shutdown(),
203 }
204 }
205
206 /// Return the current high-level state of this onion service.
207 ///
208 /// The overall state is derived from the `State`s of its underlying components
209 /// (i.e. the IPT manager and descriptor publisher).
210 pub fn state(&self) -> State {
211 use State::*;
212
213 match (self.ipt_mgr.state, self.publisher.state) {
214 (Shutdown, _) | (_, Shutdown) => Shutdown,
215 (Bootstrapping, _) | (_, Bootstrapping) => Bootstrapping,
216 (Running, Running) => Running,
217 (Recovering, _) | (_, Recovering) => Recovering,
218 (Broken, _) | (_, Broken) => Broken,
219 (DegradedUnreachable, _) | (_, DegradedUnreachable) => DegradedUnreachable,
220 (DegradedReachable, Running)
221 | (Running, DegradedReachable)
222 | (DegradedReachable, DegradedReachable) => DegradedReachable,
223 }
224 }
225
226 /// Return the most severe current problem
227 pub fn current_problem(&self) -> Option<&Problem> {
228 match (&self.ipt_mgr.latest_error, &self.publisher.latest_error) {
229 (None, None) => None,
230 (Some(e), Some(_)) => {
231 // For now, assume IPT manager errors are always more severe
232 // TODO: decide which error is the more severe (or return both)
233 Some(e)
234 }
235 (_, Some(e)) | (Some(e), _) => Some(e),
236 }
237 }
238
239 /// Return a time before which the user must re-provision this onion service
240 /// with new keys.
241 ///
242 /// Returns `None` if the onion service is able to generate and sign new
243 /// keys as needed.
244 pub fn provisioned_key_expiration(&self) -> Option<SystemTime> {
245 None // TODO (#1194): Implement
246 }
247}
248
249/// A stream of OnionServiceStatus events, returned by an onion service.
250///
251/// Note that multiple status change events may be coalesced into one if the
252/// receiver does not read them as fast as they are generated. Note also
253/// that it's possible for an item to arise in this stream without an underlying
254/// change having occurred.
255///
256//
257// We define this so that we aren't exposing postage in our public API.
258#[derive(Clone)]
259pub struct OnionServiceStatusStream(postage::watch::Receiver<OnionServiceStatus>);
260
261impl futures::Stream for OnionServiceStatusStream {
262 type Item = OnionServiceStatus;
263
264 fn poll_next(
265 mut self: std::pin::Pin<&mut Self>,
266 cx: &mut std::task::Context<'_>,
267 ) -> std::task::Poll<Option<Self::Item>> {
268 self.0.poll_next_unpin(cx)
269 }
270}
271
272/// A shared handle to a postage::watch::Sender that we can use to update an OnionServiceStatus.
273//
274// TODO: Possibly, we don't need this to be Clone: as we implement the code
275// that adjusts the status, we might find that only a single location needs to
276// hold the Sender. If that turns out to be the case, we should remove the
277// `Arc<Mutex<.>>` here. If not, we should remove this comment.
278#[derive(Clone)]
279pub(crate) struct StatusSender(Arc<Mutex<postage::watch::Sender<OnionServiceStatus>>>);
280
281/// A handle that can be used by the [`IptManager`]
282/// to update the [`OnionServiceStatus`].
283#[derive(Clone, derive_more::From)]
284pub(crate) struct IptMgrStatusSender(StatusSender);
285
286/// A handle that can be used by the [`Publisher`]
287/// to update the [`OnionServiceStatus`].
288#[derive(Clone, derive_more::From)]
289pub(crate) struct PublisherStatusSender(StatusSender);
290
291/// A helper for implementing [`PublisherStatusSender`] and [`IptMgrStatusSender`].
292///
293/// TODO: this macro is a bit repetitive, it would be nice if we could reduce duplication even
294/// further (and auto-generate a `note_<state>` function for every `State` variant).
295macro_rules! impl_status_sender {
296 ($sender:ident, $field:ident) => {
297 impl $sender {
298 /// Update `latest_error` and set the underlying state to `Broken`.
299 ///
300 /// If the new state is different, this updates the current status
301 /// and notifies all listeners.
302 pub(crate) fn send_broken(&self, err: impl Into<Problem>) {
303 self.send(State::Broken, Some(err.into()));
304 }
305
306 /// Update `latest_error` and set the underlying state to `Recovering`.
307 ///
308 /// If the new state is different, this updates the current status
309 /// and notifies all listeners.
310 #[allow(dead_code)] // NOTE: this is dead code in PublisherStatusSender
311 pub(crate) fn send_recovering(&self, err: impl Into<Problem>) {
312 self.send(State::Recovering, Some(err.into()));
313 }
314
315 /// Set `latest_error` to `None` and the underlying state to `Shutdown`.
316 ///
317 /// If the new state is different, this updates the current status
318 /// and notifies all listeners.
319 pub(crate) fn send_shutdown(&self) {
320 self.send(State::Shutdown, None);
321 }
322
323 /// Update the underlying state and latest_error.
324 ///
325 /// If the new state is different, this updates the current status
326 /// and notifies all listeners.
327 pub(crate) fn send(&self, state: State, err: Option<Problem>) {
328 let sender = &self.0;
329 let mut tx = sender.0.lock().expect("Poisoned lock");
330 let mut svc_status = tx.borrow().clone();
331 svc_status.$field.state = state;
332 svc_status.$field.latest_error = err;
333 tx.maybe_send(|_| svc_status);
334 }
335 }
336 };
337}
338
339impl_status_sender!(IptMgrStatusSender, ipt_mgr);
340impl_status_sender!(PublisherStatusSender, publisher);
341
342impl StatusSender {
343 /// Create a new StatusSender with a given initial status.
344 pub(crate) fn new(initial_status: OnionServiceStatus) -> Self {
345 let (tx, _) = postage::watch::channel_with(initial_status);
346 StatusSender(Arc::new(Mutex::new(tx)))
347 }
348
349 /// Return a copy of the current status.
350 pub(crate) fn get(&self) -> OnionServiceStatus {
351 self.0.lock().expect("Poisoned lock").borrow().clone()
352 }
353
354 /// Return a new OnionServiceStatusStream to return events from this StatusSender.
355 pub(crate) fn subscribe(&self) -> OnionServiceStatusStream {
356 OnionServiceStatusStream(self.0.lock().expect("Poisoned lock").subscribe())
357 }
358}
359
360#[cfg(test)]
361impl PublisherStatusSender {
362 /// Return a new OnionServiceStatusStream to return events from this StatusSender.
363 pub(crate) fn subscribe(&self) -> OnionServiceStatusStream {
364 self.0.subscribe()
365 }
366}
367
368#[cfg(test)]
369impl OnionServiceStatus {
370 /// Return the current high-level state of the publisher`.
371 pub(crate) fn publisher_status(&self) -> ComponentStatus {
372 self.publisher.clone()
373 }
374}
375
376#[cfg(test)]
377impl ComponentStatus {
378 /// The current `State` of this component.
379 pub(crate) fn state(&self) -> State {
380 self.state
381 }
382
383 /// The current error of this component.
384 pub(crate) fn current_problem(&self) -> Option<&Problem> {
385 self.latest_error.as_ref()
386 }
387}