1
//! Support for reporting the status of an onion service.
2

            
3
use crate::internal_prelude::*;
4

            
5
/// The current reported status of an onion service.
6
#[derive(Debug, Clone, Eq, PartialEq)]
7
pub struct OnionServiceStatus {
8
    /// The current high-level state for the IPT manager.
9
    ipt_mgr: ComponentStatus,
10

            
11
    /// The current high-level state for the descriptor publisher.
12
    publisher: ComponentStatus,
13
    // TODO (#1194): Add key expiration
14
    //
15
    // NOTE: Do _not_ add general metrics (like failure/success rates , number
16
    // of intro points, etc) here.
17
}
18

            
19
/// The current reported status of an onion service subsystem.
20
#[derive(Debug, Clone)]
21
pub(crate) struct ComponentStatus {
22
    /// The current high-level state.
23
    state: State,
24

            
25
    /// The last error we have seen.
26
    latest_error: Option<Problem>,
27
}
28

            
29
impl ComponentStatus {
30
    /// Create a new ComponentStatus for a component that has not been bootstrapped.
31
32
    fn new_shutdown() -> Self {
32
32
        Self {
33
32
            state: State::Shutdown,
34
32
            latest_error: None,
35
32
        }
36
32
    }
37
}
38

            
39
impl PartialEq for ComponentStatus {
40
168
    fn eq(&self, other: &Self) -> bool {
41
168
        let Self {
42
168
            state,
43
168
            latest_error,
44
168
        } = self;
45
168
        let Self {
46
168
            state: state_other,
47
168
            latest_error: lastest_error_other,
48
168
        } = other;
49
168

            
50
168
        // NOTE: Errors are never equal. We _could_ add half-baked PartialEq implementations for
51
168
        // all of our error types, but it doesn't seem worth it. If there is a state change, or if
52
168
        // we've encountered an error (even if it's the same as the previous one), we'll notify the
53
168
        // watchers.
54
168
        state == state_other && latest_error.is_none() && lastest_error_other.is_none()
55
168
    }
56
}
57

            
58
impl Eq for ComponentStatus {}
59

            
60
/// The high-level state of an onion service.
61
///
62
/// This type summarizes the most basic information about an onion service's
63
/// status.
64
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
65
#[non_exhaustive]
66
pub enum State {
67
    /// The service is not launched.
68
    ///
69
    /// Either [`OnionService::launch`](crate::OnionService::launch) has not
70
    /// been called, or the service has been shut down.
71
    ///
72
    /// ## Reachability
73
    ///
74
    /// The service is not reachable.
75
    Shutdown,
76
    /// The service is bootstrapping.
77
    ///
78
    /// Specifically, we have been offline, or we just initialized:
79
    /// We are trying to build introduction points and publish a descriptor,
80
    /// and haven't hit any significant problems yet.
81
    ///
82
    /// ## Reachability
83
    ///
84
    /// The service is not fully reachable, but may be reachable by some clients.
85
    Bootstrapping,
86
    /// The service is running in a degraded state.
87
    ///
88
    /// Specifically, we are not satisfied with our introduction points, but
89
    /// we do have a number of working introduction points,
90
    /// and our descriptor is up-to-date.
91
    ///
92
    /// ## Reachability
93
    ///
94
    /// The service is reachable.
95
    ///
96
    // TODO: this variant is only used by the IptManager.
97
    // We should split this enum into IptManagerState and PublisherState.
98
    DegradedReachable,
99
    /// The service is running in a degraded state.
100
    ///
101
    /// Specifically, we have a number of working introduction points,
102
    /// but we have failed to upload the descriptor to one or more HsDirs.
103
    ///
104
    /// ## Reachability
105
    ///
106
    /// The service is unlikely to be reachable.
107
    ///
108
    DegradedUnreachable,
109
    /// The service is running.
110
    ///
111
    /// Specifically, we are satisfied with our introduction points, and our
112
    /// descriptor is up-to-date.
113
    ///
114
    /// ## Reachability
115
    ///
116
    /// The service is believed to be fully reachable.
117
    Running,
118
    /// The service is trying to recover from a minor interruption.
119
    ///
120
    /// Specifically:
121
    ///   * We have encountered a problem (like a dead intro point or an
122
    ///     intermittent failure to upload a descriptor)
123
    ///   * We are trying to recover from the problem.
124
    ///   * We have not yet failed.
125
    ///
126
    /// ## Reachability
127
    ///
128
    /// The service is unlikely to be reachable.
129
    ///
130
    //
131
    // NOTE: this status is currently only set by `IptManager` whenever:
132
    //   * there are no good IPTs (so the service will be unreachable); or
133
    //   * there aren't enough good IPTs to publish (AFAICT in this case the service
134
    //   may be reachable, if the IPTs we _do_ have are have previously been published).
135
    //
136
    // TODO (#1270): split this state into 2 different states (one for the "service is
137
    // still reachable" case, and another for the "unreachable" one).
138
    Recovering,
139
    /// The service is not working.
140
    ///
141
    /// Specifically, there is a problem with this onion service, and either it
142
    /// is one we cannot recover from, or we have tried for a while to recover
143
    /// and have failed.
144
    ///
145
    /// ## Reachability
146
    ///
147
    /// The service is not fully reachable. It may temporarily be reachable by some clients.
148
    Broken,
149
}
150

            
151
impl State {
152
    /// Check whether the service is *believed* to be fully reachable.
153
    ///
154
    /// This is at best an implication in one direction, even if this returns
155
    /// `false`, the service may still be reachable.
156
    pub fn is_fully_reachable(&self) -> bool {
157
        matches!(self, Self::Running | Self::DegradedReachable)
158
    }
159
}
160

            
161
/// An error type for descriptor upload failures with retries.
162
#[derive(Clone, Debug, thiserror::Error)]
163
#[non_exhaustive]
164
pub enum DescUploadRetryError {
165
    /// A fatal (non-transient) error occurred.
166
    #[error("A fatal (non-transient) error occurred")]
167
    FatalError(RetryError<DescUploadError>),
168

            
169
    /// Ran out of retries.
170
    #[error("Ran out of retries")]
171
    MaxRetryCountExceeded(RetryError<DescUploadError>),
172

            
173
    /// Exceeded the maximum allowed time.
174
    #[error("Timeout exceeded")]
175
    Timeout(RetryError<DescUploadError>),
176

            
177
    /// Encountered an internal error.
178
    #[error("Internal error")]
179
    Bug(#[from] Bug),
180
}
181

            
182
/// A problem encountered by an onion service.
183
#[derive(Clone, Debug, derive_more::From)]
184
#[non_exhaustive]
185
pub enum Problem {
186
    /// A fatal error occurred.
187
    Runtime(FatalError),
188

            
189
    /// One or more descriptor uploads failed.
190
    DescriptorUpload(Vec<DescUploadRetryError>),
191

            
192
    /// We failed to establish one or more introduction points.
193
    Ipt(Vec<IptError>),
194
    // TODO: add variants for other transient errors?
195
}
196

            
197
impl OnionServiceStatus {
198
    /// Create a new OnionServiceStatus for a service that has not been bootstrapped.
199
16
    pub(crate) fn new_shutdown() -> Self {
200
16
        Self {
201
16
            ipt_mgr: ComponentStatus::new_shutdown(),
202
16
            publisher: ComponentStatus::new_shutdown(),
203
16
        }
204
16
    }
205

            
206
    /// Return the current high-level state of this onion service.
207
    ///
208
    /// The overall state is derived from the `State`s of its underlying components
209
    /// (i.e. the IPT manager and descriptor publisher).
210
    pub fn state(&self) -> State {
211
        use State::*;
212

            
213
        match (self.ipt_mgr.state, self.publisher.state) {
214
            (Shutdown, _) | (_, Shutdown) => Shutdown,
215
            (Bootstrapping, _) | (_, Bootstrapping) => Bootstrapping,
216
            (Running, Running) => Running,
217
            (Recovering, _) | (_, Recovering) => Recovering,
218
            (Broken, _) | (_, Broken) => Broken,
219
            (DegradedUnreachable, _) | (_, DegradedUnreachable) => DegradedUnreachable,
220
            (DegradedReachable, Running)
221
            | (Running, DegradedReachable)
222
            | (DegradedReachable, DegradedReachable) => DegradedReachable,
223
        }
224
    }
225

            
226
    /// Return the most severe current problem
227
    pub fn current_problem(&self) -> Option<&Problem> {
228
        match (&self.ipt_mgr.latest_error, &self.publisher.latest_error) {
229
            (None, None) => None,
230
            (Some(e), Some(_)) => {
231
                // For now, assume IPT manager errors are always more severe
232
                // TODO: decide which error is the more severe (or return both)
233
                Some(e)
234
            }
235
            (_, Some(e)) | (Some(e), _) => Some(e),
236
        }
237
    }
238

            
239
    /// Return a time before which the user must re-provision this onion service
240
    /// with new keys.
241
    ///
242
    /// Returns `None` if the onion service is able to generate and sign new
243
    /// keys as needed.
244
    pub fn provisioned_key_expiration(&self) -> Option<SystemTime> {
245
        None // TODO (#1194): Implement
246
    }
247
}
248

            
249
/// A stream of OnionServiceStatus events, returned by an onion service.
250
///
251
/// Note that multiple status change events may be coalesced into one if the
252
/// receiver does not read them as fast as they are generated.  Note also
253
/// that it's possible for an item to arise in this stream without an underlying
254
/// change having occurred.
255
///
256
//
257
// We define this so that we aren't exposing postage in our public API.
258
#[derive(Clone)]
259
pub struct OnionServiceStatusStream(postage::watch::Receiver<OnionServiceStatus>);
260

            
261
impl futures::Stream for OnionServiceStatusStream {
262
    type Item = OnionServiceStatus;
263

            
264
16
    fn poll_next(
265
16
        mut self: std::pin::Pin<&mut Self>,
266
16
        cx: &mut std::task::Context<'_>,
267
16
    ) -> std::task::Poll<Option<Self::Item>> {
268
16
        self.0.poll_next_unpin(cx)
269
16
    }
270
}
271

            
272
/// A shared handle to a postage::watch::Sender that we can use to update an OnionServiceStatus.
273
//
274
// TODO: Possibly, we don't need this to be Clone: as we implement the code
275
// that adjusts the status, we might find that only a single location needs to
276
// hold the Sender.  If that turns out to be the case, we should remove the
277
// `Arc<Mutex<.>>` here.  If not, we should remove this comment.
278
#[derive(Clone)]
279
pub(crate) struct StatusSender(Arc<Mutex<postage::watch::Sender<OnionServiceStatus>>>);
280

            
281
/// A handle that can be used by the [`IptManager`]
282
/// to update the [`OnionServiceStatus`].
283
#[derive(Clone, derive_more::From)]
284
pub(crate) struct IptMgrStatusSender(StatusSender);
285

            
286
/// A handle that can be used by the [`Publisher`]
287
/// to update the [`OnionServiceStatus`].
288
#[derive(Clone, derive_more::From)]
289
pub(crate) struct PublisherStatusSender(StatusSender);
290

            
291
/// A helper for implementing [`PublisherStatusSender`] and [`IptMgrStatusSender`].
292
///
293
/// TODO: this macro is a bit repetitive, it would be nice if we could reduce duplication even
294
/// further (and auto-generate a `note_<state>` function for every `State` variant).
295
macro_rules! impl_status_sender {
296
    ($sender:ident, $field:ident) => {
297
        impl $sender {
298
            /// Update `latest_error` and set the underlying state to `Broken`.
299
            ///
300
            /// If the new state is different, this updates the current status
301
            /// and notifies all listeners.
302
            pub(crate) fn send_broken(&self, err: impl Into<Problem>) {
303
                self.send(State::Broken, Some(err.into()));
304
            }
305

            
306
            /// Update `latest_error` and set the underlying state to `Recovering`.
307
            ///
308
            /// If the new state is different, this updates the current status
309
            /// and notifies all listeners.
310
            #[allow(dead_code)] // NOTE: this is dead code in PublisherStatusSender
311
80
            pub(crate) fn send_recovering(&self, err: impl Into<Problem>) {
312
80
                self.send(State::Recovering, Some(err.into()));
313
80
            }
314

            
315
            /// Set `latest_error` to `None` and the underlying state to `Shutdown`.
316
            ///
317
            /// If the new state is different, this updates the current status
318
            /// and notifies all listeners.
319
8
            pub(crate) fn send_shutdown(&self) {
320
8
                self.send(State::Shutdown, None);
321
8
            }
322

            
323
            /// Update the underlying state and latest_error.
324
            ///
325
            /// If the new state is different, this updates the current status
326
            /// and notifies all listeners.
327
136
            pub(crate) fn send(&self, state: State, err: Option<Problem>) {
328
136
                let sender = &self.0;
329
136
                let mut tx = sender.0.lock().expect("Poisoned lock");
330
136
                let mut svc_status = tx.borrow().clone();
331
136
                svc_status.$field.state = state;
332
136
                svc_status.$field.latest_error = err;
333
204
                tx.maybe_send(|_| svc_status);
334
136
            }
335
        }
336
    };
337
}
338

            
339
impl_status_sender!(IptMgrStatusSender, ipt_mgr);
340
impl_status_sender!(PublisherStatusSender, publisher);
341

            
342
impl StatusSender {
343
    /// Create a new StatusSender with a given initial status.
344
16
    pub(crate) fn new(initial_status: OnionServiceStatus) -> Self {
345
16
        let (tx, _) = postage::watch::channel_with(initial_status);
346
16
        StatusSender(Arc::new(Mutex::new(tx)))
347
16
    }
348

            
349
    /// Return a copy of the current status.
350
    pub(crate) fn get(&self) -> OnionServiceStatus {
351
        self.0.lock().expect("Poisoned lock").borrow().clone()
352
    }
353

            
354
    /// Return a new OnionServiceStatusStream to return events from this StatusSender.
355
8
    pub(crate) fn subscribe(&self) -> OnionServiceStatusStream {
356
8
        OnionServiceStatusStream(self.0.lock().expect("Poisoned lock").subscribe())
357
8
    }
358
}
359

            
360
#[cfg(test)]
361
impl PublisherStatusSender {
362
    /// Return a new OnionServiceStatusStream to return events from this StatusSender.
363
8
    pub(crate) fn subscribe(&self) -> OnionServiceStatusStream {
364
8
        self.0.subscribe()
365
8
    }
366
}
367

            
368
#[cfg(test)]
369
impl OnionServiceStatus {
370
    /// Return the current high-level state of the publisher`.
371
16
    pub(crate) fn publisher_status(&self) -> ComponentStatus {
372
16
        self.publisher.clone()
373
16
    }
374
}
375

            
376
#[cfg(test)]
377
impl ComponentStatus {
378
    /// The current `State` of this component.
379
16
    pub(crate) fn state(&self) -> State {
380
16
        self.state
381
16
    }
382

            
383
    /// The current error of this component.
384
16
    pub(crate) fn current_problem(&self) -> Option<&Problem> {
385
16
        self.latest_error.as_ref()
386
16
    }
387
}