tor_socksproto/handshake/framework.rs
1//! Framework for helping implement a `handshake` function
2//!
3//! Each kind of handshake should:
4//!
5//! * `impl HandshakeImpl`, supplying a `handshake_impl` which does the actual work.
6//!
7//! * Provide the public `fn handshake` function,
8//! in terms of the provided method `HandshakeImpl::run_handshake`.
9//!
10//! * Derive [`Handshake`](derive_deftly_template_Handshake).
11//
12// The contents of this module is not in handshake.rs,
13// because putting it there would give the protocol implementations
14// access to fields of our private types etc.
15//
16// TODO arguably, the handshake module is a redundant level of nesting.
17// We could consider moving its sub-modules into the toplevel,
18// and its handful of items elsewhere.
19
20use std::fmt::Debug;
21use std::mem;
22use std::num::{NonZeroUsize, TryFromIntError};
23
24use derive_deftly::define_derive_deftly;
25use educe::Educe;
26
27use tor_bytes::Reader;
28use tor_error::{internal, Bug};
29
30use crate::SOCKS_BUF_LEN;
31use crate::{Action, Error, Truncated};
32
33/// Markers indicating whether we're allowing read-ahead,
34///
35/// The `P` type parameter on `[Buffer]` et al indicates
36/// whether we are doing (only) precise reads:
37/// `()` for normal operation, with readahead;
38/// `PreciseReads` for reading small amounts as needed.
39///
40/// ## Normal operation, `P = ()`
41///
42/// When the SOCKS protocol implementation wants to see more data,
43/// [`RecvStep::<()>::buf`] is all of the free space in the buffer.
44///
45/// The caller will typically read whatever data is available,
46/// including possibly data sent by the peer *after* the end of the SOCKS handshake.
47/// If so, that data will eventually be returned, after the handshake is complete,
48/// by [`Finished::into_output_and_slice`] or [`Finished::into_output_and_vec`].
49///
50/// ## Avoiding read-ahead, `P = PreciseReads`
51///
52/// [`RecvStep::<PreciseReads>::buf()`] is only as long as the SOCKS protocol implementation
53/// *knows* that it needs.
54///
55/// Typically this is a very small buffer, often only one byte.
56/// This means that a single protocol exchange will involve many iterations
57/// each returning a `RecvStep`,
58/// and (depending on the caller) each implying one `recv(2)` call or similar.
59/// This is not very performant.
60/// But it does allow the implementation to avoid reading ahead.
61///
62/// In this mode, `Finished::into_output` is available,
63/// which returns only the output.
64pub trait ReadPrecision: ReadPrecisionSealed + Default + Copy + Debug {}
65impl ReadPrecision for PreciseReads {}
66impl ReadPrecision for () {}
67
68/// Sealed, and adjustment of `RecvStep::buf`
69pub trait ReadPrecisionSealed {
70 /// Adjust `buf` to `deficit`, iff we're doing precise reads
71 fn recv_step_buf(buf: &mut [u8], deficit: NonZeroUsize) -> &mut [u8];
72}
73impl ReadPrecisionSealed for () {
74 fn recv_step_buf(buf: &mut [u8], _deficit: NonZeroUsize) -> &mut [u8] {
75 buf
76 }
77}
78impl ReadPrecisionSealed for PreciseReads {
79 fn recv_step_buf<'b>(buf: &mut [u8], deficit: NonZeroUsize) -> &mut [u8] {
80 &mut buf[0..deficit.into()]
81 }
82}
83
84/// Marker indicating precise reads
85///
86/// See [`ReadPrecision`].
87#[derive(Default, Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
88#[allow(clippy::exhaustive_structs)]
89pub struct PreciseReads;
90
91/// An input buffer containing maybe some socks data
92///
93/// `Buffer` has a capacity set at creation time,
94/// and records how much data it contains.
95///
96/// Data is consumed by [`step()`](Handshake::step), and
97/// received data is appended using a [`RecvStep`] returned from `step`.
98///
99/// The `P` type parameter indicates whether we're allowing read-ahead,
100/// or doing only precise reads.
101/// See [`ReadPrecision`] for details.
102//
103// `P` prevents accidentally mixing `Finished.into_output`
104// with reads into the whole buffer, not limited by the deficit.
105#[derive(Educe)]
106#[educe(Debug)]
107pub struct Buffer<P: ReadPrecision = ()> {
108 /// The actual buffer
109 #[educe(Debug(ignore))]
110 buf: Box<[u8]>,
111
112 /// `[0..filled]` has data that's been read but not yet drained
113 filled: usize,
114
115 /// Marker for the precision
116 //
117 // We don't need PhantomData, since P is always a Copy unit.
118 #[allow(dead_code)]
119 precision: P,
120}
121
122/// Next step to take in the handshake
123///
124/// Returned by [`Handshake::step`].
125///
126/// Instructions from the handshake implementation.
127/// Caller should match on this and perform the requested action.
128//
129// This is an enum, rather than a struct with fields representing different components
130// of an instruction, because an enum demands of the caller that they do precise one thing.
131// With a compound instruction struct, it would be quite easy for a caller to
132// (sometimes) fail to execute some part(s).
133#[derive(Debug)]
134#[allow(clippy::exhaustive_enums)] // callers have no good response to unknown variants anyway
135pub enum NextStep<'b, O, P: ReadPrecision> {
136 /// Caller should send this data to the peer
137 Send(Vec<u8>),
138
139 /// Caller should read from the peer and call one of the `received` functions.
140 Recv(RecvStep<'b, P>),
141
142 /// The handshake is complete
143 ///
144 /// The returned [`Finished`] can be used to obtain the handshake output.
145 ///
146 /// The `Handshake` should not be used any more after this.
147 Finished(Finished<'b, O, P>),
148}
149
150/// A completed handshake
151///
152/// Represents:
153/// * [`Handshake::Output`],
154/// a value representing the meaning of the completed protocol exchange.
155/// * Possibly, some data which was received, but didn't form part of the protocol.
156//
157// Returning this in `NextStep::finished` means that the caller can access the output
158// iff the handshake as finished. Also, this type's API helps prevent accidental
159// discard of any readahead that there might be.
160#[derive(Debug)]
161#[must_use]
162pub struct Finished<'b, O, P: ReadPrecision> {
163 /// The buffer
164 buffer: &'b mut Buffer<P>,
165
166 /// Details of the completed handshake:
167 output: O,
168}
169
170impl<'b, O> Finished<'b, O, PreciseReads> {
171 /// Return (just) the output of the completed handshake
172 ///
173 /// Available only if the `Buffer` was constructed with [`Buffer::new_precise()`]
174 /// (or equivalent).
175 pub fn into_output(self) -> Result<O, Bug> {
176 if let Ok(nonzero) = NonZeroUsize::try_from(self.buffer.filled_slice().len()) {
177 Err(internal!(
178 "handshake complete, but we read too much earlier, and are now misframed by {nonzero} bytes!"
179 ))
180 } else {
181 Ok(self.output)
182 }
183 }
184}
185
186impl<'b, O, P: ReadPrecision> Finished<'b, O, P> {
187 /// Return the output, and the following already-read data as a slice
188 ///
189 /// (After callin gthis, the following already-read data
190 /// will no longer be in the `Buffer`.)
191 pub fn into_output_and_slice(self) -> (O, &'b [u8]) {
192 let filled = mem::take(&mut self.buffer.filled);
193 let data = &self.buffer.buf[0..filled];
194 (self.output, data)
195 }
196
197 /// Return the output, and the following already-read data as a `Vec`
198 ///
199 /// The `Vec` is quite likely to have a considerably larger capacity than contents.
200 /// (Its capacity is usually the original buffer size, when the `Buffer` was created.)
201 ///
202 /// The `Buffer` should not be discarded after calling this;
203 /// it will not be usable.
204 //
205 // Ideally, this would *consume* the Buffer. But that would mean that
206 // `step` would have to take and return the buffer,
207 // which would be quite inconvenient at call sites.
208 pub fn into_output_and_vec(self) -> (O, Vec<u8>) {
209 let mut data = mem::take(&mut self.buffer.buf).into_vec();
210 data.truncate(self.buffer.filled);
211 (self.output, data)
212 }
213
214 /// Return the output of the completed handshake, declaring any readahead a protocol error
215 ///
216 /// This function is appropriate when the peer is not supposed to send data
217 /// until the handshake is complete.
218 /// If data *did* arrive before then, and was read, we call it a protocol error,
219 /// [`Error::ForbiddenPipelining`].
220 pub fn into_output_forbid_pipelining(self) -> Result<O, Error> {
221 if !self.buffer.filled_slice().is_empty() {
222 Err(Error::ForbiddenPipelining)
223 } else {
224 Ok(self.output)
225 }
226 }
227}
228
229/// Next step - details for reading from the peer
230///
231/// Value in [`NextStep::Recv`].
232///
233/// Caller should read from the peer and call one of the `received` functions.
234/// Specifically, caller should do one of the following:
235///
236/// 1. Read some data into the slice returned by [`.buf()`](RecvStep::buf),
237/// and then call [`.note_received()`](RecvStep::note_received).
238///
239/// 2. Determine the available buffer space with [`.buf()`](RecvStep::buf)`.len()`,
240/// write some data into the buffer's [`unfilled_slice()`](Buffer::unfilled_slice),
241/// and call [`Buffer::note_received`].
242/// This allows the caller to
243/// dispose of the [`RecvStep`] (which mutably borrows the `Buffer`)
244/// while reading,
245/// at the cost of slightly less correctness checking by the compiler.
246///
247/// The caller should *not* wait for enough data to fill the whole `buf`.
248#[derive(Debug)]
249pub struct RecvStep<'b, P: ReadPrecision> {
250 /// The buffer
251 buffer: &'b mut Buffer<P>,
252
253 /// Lower bound on the number of bytes that the handshake needs to read to complete.
254 ///
255 /// Useful only for callers that want to avoid reading beyond the end of the handshake.
256 /// Always `<= .buf().len()`.
257 ///
258 /// The returned value has the same semantics as
259 /// [`tor_bytes::IncompleteMessage.deficit`.
260 deficit: NonZeroUsize,
261}
262
263impl<'b, P: ReadPrecision> RecvStep<'b, P> {
264 /// Returns the buffer slice the caller should write data into.
265 ///
266 /// For precise reads, returns the slice of the buffer of length `deficit`.
267 /// sol as to avoid reading ahead beyond the end of the handshake.
268 pub fn buf(&mut self) -> &mut [u8] {
269 P::recv_step_buf(self.buffer.unfilled_slice(), self.deficit)
270 }
271
272 /// Notes that `len` bytes have been received.
273 ///
274 /// The actual data must already have been written to the slice from `.buf()`.
275 ///
276 /// If `len == 0`, treats this as having received EOF (which is an error).
277 ///
278 /// # Panics
279 ///
280 /// `len` must be no more than `.buf().len()`.
281 pub fn note_received(self, len: usize) -> Result<(), Error> {
282 let len = len
283 .try_into()
284 .map_err(|_: TryFromIntError| Error::UnexpectedEof)?;
285 self.buffer.note_received(len);
286 Ok(())
287 }
288}
289
290impl<P: ReadPrecision> Default for Buffer<P> {
291 fn default() -> Self {
292 Buffer::with_size(SOCKS_BUF_LEN)
293 }
294}
295
296impl Buffer<()> {
297 /// Creates a new default `Buffer`
298 pub fn new() -> Self {
299 Self::default()
300 }
301}
302
303impl Buffer<PreciseReads> {
304 /// Creates a new `Buffer` for reeading precisely
305 ///
306 /// ```
307 /// use tor_socksproto::{Handshake as _, SocksProxyHandshake, SocksRequest};
308 ///
309 /// let mut hs = SocksProxyHandshake::new();
310 /// let mut buf = tor_socksproto::Buffer::new_precise();
311 /// ```
312 pub fn new_precise() -> Self {
313 Self::default()
314 }
315}
316
317impl<P: ReadPrecision> Buffer<P> {
318 /// Creates a new `Buffer` with a specified size
319 ///
320 /// Specify the `P` type parameter according to whether you wanted
321 /// a `Buffer` like from [`Buffer::new()`], which will read eagerly,
322 /// or one like from [`Buffer::new_precise()`], which will read eagerly,
323 /// See [`ReadPrecision`].
324 ///
325 /// ```
326 /// let mut buf = tor_socksproto::Buffer::<tor_socksproto::PreciseReads>::with_size(2048);
327 /// ```
328 pub fn with_size(size: usize) -> Self {
329 Buffer {
330 buf: vec![0xaa; size].into(),
331 filled: 0,
332 precision: P::default(),
333 }
334 }
335
336 /// Creates a new `Buffer` from a partially-filed buffer
337 ///
338 /// * `buf[..filled]` should contain data already read from the peer
339 /// * `buf[filled..]` should be zero (or other innocuous data),
340 /// and will not be used (except if there are bugs)
341 ///
342 /// Using this and `into_parts` to obtain a `Buffer`
343 /// with a differetn the read precision (different `P` type parameter)
344 /// can result in malfunctions.
345 pub fn from_parts(buf: Box<[u8]>, filled: usize) -> Self {
346 Buffer {
347 buf,
348 filled,
349 precision: P::default(),
350 }
351 }
352
353 /// Disassembles a `Buffer`, returning the pieces
354 pub fn into_parts(self) -> (Box<[u8]>, usize) {
355 let Buffer {
356 buf,
357 filled,
358 precision: _,
359 } = self;
360 (buf, filled)
361 }
362
363 /// The portion of the buffer that is available for writing new data.
364 ///
365 /// The caller may fill this (from the beginning) with more data,
366 /// and then call [`Buffer::note_received`].
367 /// Normally, the caller will do this after receiving a [`NextStep::Recv`] instruction.
368 ///
369 /// Where possible, prefer [`RecvStep::buf`] and [`RecvStep::note_received`].
370 pub fn unfilled_slice(&mut self) -> &mut [u8] {
371 &mut self.buf[self.filled..]
372 }
373
374 /// The portion of the buffer that contains already-read, but unprocessed, data.
375 ///
376 /// Callers will not normally want this.
377 pub fn filled_slice(&mut self) -> &[u8] {
378 &self.buf[..self.filled]
379 }
380
381 /// Notes that `len` bytes have been received.
382 ///
383 /// The actual data must already have been written to the slice from `.unfilled_slice()`.
384 /// Where possible, prefer [`RecvStep::buf`] and [`RecvStep::note_received`].
385 ///
386 /// (It doesn't make sense to call this with `len == 0`.
387 /// If the `0` came from a read call, this indicates EOF -
388 /// but that might not be an error if the protocol implemnetation doesn't need more data.
389 /// [`RecvStep::note_received`] handles this properly.)
390 ///
391 /// # Panics
392 ///
393 /// `len` must be no more than `.unfilled_slice().len()`.
394 pub fn note_received(&mut self, len: NonZeroUsize) {
395 let len = usize::from(len);
396 assert!(len <= self.unfilled_slice().len());
397 self.filled += len;
398 }
399}
400
401define_derive_deftly! {
402 /// Macro-generated components for a handshake outer state structure
403 ///
404 /// # Requirements
405 ///
406 /// * Must be a struct containing `state: State`
407 /// * `State` must be in scope as a binding at the derivation site
408 /// * `State` must have a unit variant `Failed`
409 /// * One `Option` field must be decorated `#[deftly(handshake(output))]`
410 ///
411 /// # Generates
412 ///
413 /// * Implementation of `HasHandshake`
414 /// * Implementation of `HasHandshakeState`
415 /// * Implementation of `HasHandshakeOutput`
416 //
417 // An alternative would be to have a each handwhake contain an enum
418 // which we handle here ourselves, moving `Done` and `failed` here.
419 // But currently each handshake stores state outside `state`;
420 // some more intermediate structs would be needed.
421 Handshake for struct, expect items:
422
423 impl $crate::handshake::framework::HasHandshakeState for $ttype {
424 fn set_failed(&mut self) {
425 self.state = State::Failed {};
426 }
427 }
428
429 $(
430 // This is supposed to happen precisely once
431 ${when fmeta(handshake(output))}
432
433 // This trick extracts the T from Option<T>
434 ${define OUTPUT { <$ftype as IntoIterator>::Item }}
435
436 impl $crate::handshake::framework::Handshake for $ttype {
437 type Output = $OUTPUT;
438 }
439
440 impl $crate::handshake::framework::HasHandshakeOutput<$OUTPUT> for $ttype {
441 fn take_output(&mut self) -> Option<$OUTPUT> {
442 // using UFCS arranges that we check that $ftype really is Option
443 Option::take(&mut self.$fname)
444 }
445 }
446 )
447}
448#[allow(unused_imports)] // false positives, rust#130570, see also derive-deftly #117
449#[allow(clippy::single_component_path_imports)] // false positive, see rust-clippy#13419
450use derive_deftly_template_Handshake; // for rustdoc's benefit
451
452/// The internal (implementation-side) representation of the next step to take
453///
454/// `handwhake_impl` may not consume nothing from the `Reader`
455/// and return `Reply { reply: vec![] }`,
456/// since that would imply an infinite loop.
457pub(crate) enum ImplNextStep {
458 /// Send some data to the peer
459 Reply {
460 /// The message to send
461 reply: Vec<u8>,
462 },
463
464 /// We're done. The output is available.
465 Finished,
466}
467
468/// `Handshake` structs that have a state that can be `Failed`
469///
470/// Derive this with
471/// [`#[derive_deftly(Handshake)]`](derive_deftly_template_Handshake).
472pub(super) trait HasHandshakeState {
473 /// Set the state to `Failed`
474 fn set_failed(&mut self);
475}
476
477/// `Handshake` structs whose output can be obtained
478///
479/// Derive this with
480/// [`#[derive_deftly(Handshake)]`](derive_deftly_template_Handshake).
481pub(super) trait HasHandshakeOutput<O> {
482 /// Obtain the output from a handshake completed with [`.handshake`](Handshake::handshake)
483 ///
484 /// Call only if `Action` said `finished`, and then only once.
485 /// Otherwise, will return `None`.
486 fn take_output(&mut self) -> Option<O>;
487}
488
489/// `Handshake`s: `SocksClientHandshake` or `SocksProxyHandshake`
490pub(super) trait HandshakeImpl: HasHandshakeState {
491 /// Actual implementation, to be provided
492 ///
493 /// Does not need to handle setting the state to `Failed` on error.
494 /// But *does* need to handle setting the state to `Done` if applicable.
495 ///
496 /// May return the error from the `Reader`, in `Error::Decode`.
497 /// (For example,. `Error::Decode(tor_bytes::Error::Incomplete)`
498 /// if the message was incomplete and reading more data would help.)
499 fn handshake_impl(&mut self, r: &mut tor_bytes::Reader<'_>) -> crate::Result<ImplNextStep>;
500
501 /// Helper, used by public API implementations to call `handshake_impl`.
502 ///
503 /// Deals with:
504 /// * Setting up the `Reader`
505 /// * Determining the amount drained.
506 /// * Avoiding infinite loops (detect nothing drained, nothing replied)
507 ///
508 /// Return value is `(drain, Result<ImplNextStep>)`.
509 fn call_handshake_impl(&mut self, input: &[u8]) -> (usize, crate::Result<ImplNextStep>) {
510 let mut b = Reader::from_possibly_incomplete_slice(input);
511 let rv = self.handshake_impl(&mut b);
512 let drain = b.consumed();
513
514 // avoid infinite loop
515 match &rv {
516 Ok(ImplNextStep::Reply { reply }) if reply.is_empty() && drain == 0 => {
517 return (
518 0,
519 Err(
520 internal!("protocol implementation drained nothing, replied nothing")
521 .into(),
522 ),
523 )
524 }
525 _ => {}
526 };
527
528 (drain, rv)
529 }
530}
531
532/// Handshake
533#[allow(private_bounds)] // This is a sealed trait, that's expected
534pub trait Handshake: HandshakeImpl + HasHandshakeOutput<Self::Output> {
535 /// Output from the handshake: the meaning, as we understand it
536 type Output: Debug;
537
538 /// Drive a handshake forward, determining what the next step is
539 ///
540 /// ```no_run
541 /// # fn main() -> Result<(), anyhow::Error> {
542 /// use std::io::{Read as _, Write as _};
543 /// use tor_socksproto::{Handshake as _, SocksProxyHandshake, SocksRequest};
544 ///
545 /// let socket: std::net::TcpStream = todo!();
546 ///
547 /// let mut hs = SocksProxyHandshake::new();
548 /// let mut buf = tor_socksproto::Buffer::new();
549 /// let (request, data_read_ahead) = loop {
550 /// use tor_socksproto::NextStep;
551 /// match hs.step(&mut buf)? {
552 /// NextStep::Send(data) => socket.write_all(&data)?,
553 /// NextStep::Recv(recv) => {
554 /// let got = socket.read(recv.buf())?;
555 /// recv.note_received(got);
556 /// },
557 /// NextStep::Finished(request) => break request.into_output_and_vec(),
558 /// }
559 /// };
560 /// let _: SocksRequest = request;
561 /// let _: Vec<u8> = data_read_ahead;
562 ///
563 /// // Or, with precise reading:
564 ///
565 /// //...
566 /// let mut buf = tor_socksproto::Buffer::new_precise();
567 /// let request = loop {
568 /// use tor_socksproto::NextStep;
569 /// match hs.step(&mut buf)? {
570 /// //...
571 /// NextStep::Finished(request) => break request.into_output()?,
572 /// # _ => todo!(),
573 /// }
574 /// };
575 /// let _: SocksRequest = request;
576 /// # }
577 /// ```
578 ///
579 /// See `[ReadPrecision]` for information about read precision and the `P` type parameter.
580 fn step<'b, P: ReadPrecision>(
581 &mut self,
582 buffer: &'b mut Buffer<P>,
583 ) -> Result<NextStep<'b, <Self as Handshake>::Output, P>, Error> {
584 let (drain, rv) = self.call_handshake_impl(buffer.filled_slice());
585
586 if let Err(Error::Decode(tor_bytes::Error::Incomplete { deficit, .. })) = rv {
587 let deficit = deficit.into_inner();
588 return if usize::from(deficit) > buffer.unfilled_slice().len() {
589 Err(Error::MessageTooLong {
590 limit: buffer.buf.len(),
591 })
592 } else {
593 Ok(NextStep::Recv(RecvStep { buffer, deficit }))
594 };
595 };
596
597 let rv = rv?;
598
599 buffer.buf.copy_within(drain..buffer.filled, 0);
600 buffer.filled -= drain;
601
602 Ok(match rv {
603 ImplNextStep::Reply { reply } => NextStep::Send(reply),
604 ImplNextStep::Finished => {
605 let output = self.take_output().ok_or_else(|| internal!("no output!"))?;
606 NextStep::Finished(Finished { buffer, output })
607 }
608 })
609 }
610
611 /// Try to advance the handshake, given some peer input in
612 /// `input`.
613 ///
614 /// If there isn't enough input, gives a [`Truncated`].
615 /// In this case, *the caller must retain the input*, and pass it to a later
616 /// invocation of `handshake`. Input should only be regarded as consumed when
617 /// the `Action::drain` field is nonzero.
618 ///
619 /// Other errors (besides `Truncated`) indicate a failure.
620 ///
621 /// On success, return an Action describing what to tell the peer,
622 /// and how much of its input to consume.
623 //
624 // When removing this API, also remove `Action`.
625 #[deprecated = "use the new Handshake::step API instead"]
626 fn handshake(&mut self, input: &[u8]) -> crate::TResult<Action> {
627 let (drain, rv) = self.call_handshake_impl(input);
628 match rv {
629 #[allow(deprecated)]
630 Err(Error::Decode(
631 tor_bytes::Error::Incomplete { .. } | tor_bytes::Error::Truncated,
632 )) => Err(Truncated::new()),
633 Err(e) => {
634 self.set_failed();
635 Ok(Err(e))
636 }
637 Ok(ImplNextStep::Reply { reply }) => Ok(Ok(Action {
638 drain,
639 reply,
640 finished: false,
641 })),
642 Ok(ImplNextStep::Finished) => Ok(Ok(Action {
643 drain,
644 reply: vec![],
645 finished: true,
646 })),
647 }
648 }
649
650 /// [`Handshake::handshake`] for tests
651 ///
652 /// This wrapper function allows us to avoid writing many (or broad) allows in our tests.
653 #[cfg(test)]
654 #[allow(deprecated)]
655 fn handshake_for_tests(&mut self, input: &[u8]) -> crate::TResult<Action> {
656 self.handshake(input)
657 }
658}