1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
//! Declare an error type for the `tor-hsservice` crate.

use crate::internal_prelude::*;

pub use crate::rend_handshake::{EstablishSessionError, IntroRequestError};

/// An error which occurs trying to create and start up an onion service
///
/// This is only returned by startup methods.
/// After the service is created and started,
/// we will continue to try keep the service alive,
/// retrying things as necessary.
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum StartupError {
    /// A keystore operation failed.
    #[error("Keystore error while attempting to {action}")]
    Keystore {
        /// The action we were trying to perform.
        action: &'static str,
        /// The underlying error
        #[source]
        cause: tor_keymgr::Error,
    },

    /// Keystore corruption.
    #[error("The keystore is unrecoverably corrupt")]
    KeystoreCorrupted,

    /// Trouble reading on-disk state
    #[error("reading on-disk state")]
    // Not #[from] as that might allow call sites that were *storing* during startup
    // to accidentally use this variant.  (Such call sites probably shouldn't exist.)
    LoadState(#[source] tor_persist::Error),

    /// Unable to access on-disk state
    #[error("Unable to access on-disk state")]
    StateDirectoryInaccessible(#[source] tor_persist::Error),

    /// Unable to access on-disk state using underlying IO operations
    #[error("Unable to access on-disk state: {action} {}", path.display_lossy())]
    // TODO ideally we'd like to use StateDirectoryInaccessiblePersist and tor_persist::Error
    // for this too, but tor_persist::Error is quite awkward.
    StateDirectoryInaccessibleIo {
        /// What happened
        #[source]
        source: Arc<io::Error>,

        /// What filesystem path we were trying to access
        path: PathBuf,

        /// What we were trying to do to it
        //
        // TODO this should be an enum, not a static string, but see above
        action: &'static str,
    },

    /// Fatal error (during startup)
    #[error("fatal error")]
    Fatal(#[from] FatalError),

    /// Unable to spawn task
    //
    // TODO too many types have an open-coded version of FooError::Spawn
    // Instead we should:
    //  * Have tor_rtcompat provide a SpawnError struct which contains the task identifier
    //  * Have tor_rtcompat provide a spawn method that takes an identifier
    //    (and which passes that identifier to runtimes that support such a thing,
    //    including our own mock spawner)
    //  * Change every crate's task spawning and error handling to use the new things
    //    (breaking changes to the error type, unless we retain unused compat error variants)
    //
    // TODO HSS replace this with a conversion to StartupError::Fatal(FatalError::Spawn ) ?
    #[error("Unable to spawn {spawning}")]
    Spawn {
        /// What we were trying to spawn
        spawning: &'static str,
        /// What happened when we tried to spawn it.
        #[source]
        cause: Arc<SpawnError>,
    },

    /// Tried to launch an onion service that has already been launched.
    #[error("Onion service has already been launched")]
    AlreadyLaunched,
}

impl HasKind for StartupError {
    fn kind(&self) -> ErrorKind {
        use ErrorKind as EK;
        use StartupError as E;
        match self {
            E::Keystore { cause, .. } => cause.kind(),
            E::KeystoreCorrupted => EK::KeystoreCorrupted,
            E::Spawn { cause, .. } => cause.kind(),
            E::AlreadyLaunched => EK::BadApiUsage,
            E::LoadState(e) => e.kind(),
            E::StateDirectoryInaccessible(e) => e.kind(),
            E::StateDirectoryInaccessibleIo { .. } => EK::PersistentStateAccessFailed,
            E::Fatal(e) => e.kind(),
        }
    }
}

impl From<Bug> for StartupError {
    fn from(bug: Bug) -> StartupError {
        FatalError::from(bug).into()
    }
}

/// An error which occurs trying to communicate with a particular client.
///
/// This is returned by `RendRequest::accept` and `StreamRequest::accept`.
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum ClientError {
    /// Failed to process an INTRODUCE2 request.
    #[error("Could not process INTRODUCE request")]
    BadIntroduce(#[source] IntroRequestError),

    /// Failed to complete a rendezvous request.
    #[error("Could not connect rendezvous circuit.")]
    EstablishSession(#[source] EstablishSessionError),

    /// Failed to send a CONNECTED message and get a stream.
    #[error("Could not accept stream from rendezvous circuit")]
    AcceptStream(#[source] tor_proto::Error),

    /// Failed to send a END message and reject a stream.
    #[error("Could not reject stream from rendezvous circuit")]
    RejectStream(#[source] tor_proto::Error),
}

impl HasKind for ClientError {
    fn kind(&self) -> ErrorKind {
        match self {
            ClientError::BadIntroduce(e) => e.kind(),
            ClientError::EstablishSession(e) => e.kind(),
            ClientError::AcceptStream(e) => e.kind(),
            ClientError::RejectStream(e) => e.kind(),
        }
    }
}

/// Latest time to retry a failed IPT store (eg, disk full)
//
// TODO (#1226): should we make this configurable? Probably not; it's not clear why a
// user would want disk failure errors to be retried on any particular interval.
// Instead it might make more sense to consider a unified strategy for handling
// state errors.
const IPT_STORE_RETRY_MAX: Duration = Duration::from_secs(60);

/// An error arising when trying to store introduction points
///
/// These don't escape the crate, except to be logged.
///
/// These errors might be fatal, or they might be something we should retry.
#[derive(Clone, Debug, Error)]
pub(crate) enum IptStoreError {
    /// Unable to store introduction points
    #[error("Unable to store introduction points")]
    Store(#[from] tor_persist::Error),

    /// Fatal error
    #[error("Fatal error")]
    Fatal(#[from] FatalError),
}

impl From<Bug> for IptStoreError {
    fn from(bug: Bug) -> IptStoreError {
        FatalError::from(bug).into()
    }
}

impl IptStoreError {
    /// Log this error, and report latest time to retry
    ///
    /// It's OK to retry this earlier, if we are prompted somehow by other work;
    /// this is the longest time we should wait, so that we poll periodically
    /// to see if the situation has improved.
    ///
    /// If the operation shouldn't be retried, the problem was a fatal error,
    /// which is simply returned.
    // TODO: should this be a HasRetryTime impl instead?  But that has different semantics.
    pub(crate) fn log_retry_max(self, nick: &HsNickname) -> Result<Duration, FatalError> {
        use IptStoreError as ISE;
        let wait = match self {
            ISE::Store(_) => IPT_STORE_RETRY_MAX,
            ISE::Fatal(e) => return Err(e),
        };
        error_report!(self, "HS service {}: error", nick);
        Ok(wait)
    }
}

/// An error which means we cannot continue to try to operate an onion service.
///
/// These errors only occur during operation, and only for catastrophic reasons
/// (such as the async reactor shutting down).
//
// TODO where is FatalError emitted from this crate into the wider program ?
// Perhaps there will be some kind of monitoring handle (TODO (#1083)) that can produce one of these.
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum FatalError {
    /// Unable to spawn task
    #[error("Unable to spawn {spawning}")]
    Spawn {
        /// What we were trying to spawn
        spawning: &'static str,
        /// What happened when we tried to spawn it.
        #[source]
        cause: Arc<SpawnError>,
    },

    /// Failed to access the keystore.
    #[error("failed to access keystore")]
    Keystore(#[from] tor_keymgr::Error),

    /// Failed to access the keystore due to incompatible concurrent access.
    ///
    /// This can only happen if someone is modifying the contents of the keystore
    /// just as we are trying to access it.
    #[error("keystore {action} failed for {path} (someone else is writing to the keystore?!)")]
    KeystoreRace {
        /// What action we were trying to perform
        action: &'static str,
        /// The ArtiPath we were trying to access
        path: tor_keymgr::ArtiPath,
    },

    /// The identity keypair of the service could not be found in the keystore.
    #[error("Hidden service identity key not found: {0}")]
    MissingHsIdKeypair(HsNickname),

    /// IPT keys found for being-created IPT
    ///
    /// This could only happen if someone is messing with our RNG
    /// or our code is completely wrong, or something.
    #[error("IPT keys found for being-created IPT {0} (serious key management problems!)")]
    IptKeysFoundUnexpectedly(tor_keymgr::ArtiPath),

    /// The network directory provider is shutting down without giving us the
    /// netdir we asked for.
    #[error("{0}")]
    NetdirProviderShutdown(#[from] NetdirProviderShutdown),

    /// An error caused by a programming issue . or a failure in another
    /// library that we can't work around.
    #[error("Programming error")]
    Bug(#[from] Bug),
}

impl FatalError {
    /// Construct a new `FatalError` from a `SpawnError`.
    //
    // TODO lots of our Errors have a function exactly like this.
    pub(super) fn from_spawn(spawning: &'static str, err: SpawnError) -> FatalError {
        FatalError::Spawn {
            spawning,
            cause: Arc::new(err),
        }
    }
}

impl HasKind for FatalError {
    fn kind(&self) -> ErrorKind {
        use ErrorKind as EK;
        use FatalError as FE;
        match self {
            FE::Spawn { cause, .. } => cause.kind(),
            FE::Keystore(e) => e.kind(),
            FE::MissingHsIdKeypair(_) => EK::Internal, // TODO (#1256) This is not always right.
            FE::KeystoreRace { .. } => EK::KeystoreAccessFailed,
            FE::IptKeysFoundUnexpectedly(_) => EK::Internal, // This is indeed quite bad.
            FE::NetdirProviderShutdown(e) => e.kind(),
            FE::Bug(e) => e.kind(),
        }
    }
}

/// Error occurring in [`IptManager::expire_old_ipts_external_persistent_state`](crate::ipt_mgr::IptManager::expire_old_ipts_external_persistent_state)
///
/// All that happens with these errors is that they are logged
/// (with a rate limit).
#[derive(Error, Clone, Debug)]
pub(crate) enum StateExpiryError {
    /// Key expiry failed
    #[error("key(s)")]
    Key(#[from] tor_keymgr::Error),
    /// Replay log expiry (or other things using `tor_persist`) failed
    #[error("replay log(s): failed to {operation} {}", path.display_lossy())]
    ReplayLog {
        /// The actual error
        #[source]
        source: Arc<io::Error>,
        /// The pathname
        path: PathBuf,
        /// What we were doing
        operation: &'static str,
    },
    /// Internal error
    #[error("internal error")]
    Bug(#[from] Bug),
}

impl HasKind for StateExpiryError {
    fn kind(&self) -> ErrorKind {
        use tor_error::ErrorKind as EK;
        use StateExpiryError as SEE;
        match self {
            SEE::Key(e) => e.kind(),
            SEE::ReplayLog { .. } => EK::PersistentStateAccessFailed,
            SEE::Bug(e) => e.kind(),
        }
    }
}