tor_socksproto/handshake/
framework.rs

1//! Framework for helping implement a `handshake` function
2//!
3//! Each kind of handshake should:
4//!
5//!  * `impl HandshakeImpl`, supplying a `handshake_impl` which does the actual work.
6//!
7//!  * Provide the public `fn handshake` function,
8//!    in terms of the provided method `HandshakeImpl::run_handshake`.
9//!
10//!  * Derive [`Handshake`](derive_deftly_template_Handshake).
11//
12// The contents of this module is not in handshake.rs,
13// because putting it there would give the protocol implementations
14// access to fields of our private types etc.
15//
16// TODO arguably, the handshake module is a redundant level of nesting.
17// We could consider moving its sub-modules into the toplevel,
18// and its handful of items elsewhere.
19
20use std::fmt::Debug;
21use std::mem;
22use std::num::{NonZeroUsize, TryFromIntError};
23
24use derive_deftly::define_derive_deftly;
25use educe::Educe;
26
27use tor_bytes::Reader;
28use tor_error::{internal, Bug};
29
30use crate::SOCKS_BUF_LEN;
31use crate::{Action, Error, Truncated};
32
33/// Markers indicating whether we're allowing read-ahead,
34///
35/// The `P` type parameter on `[Buffer]` et al indicates
36/// whether we are doing (only) precise reads:
37/// `()` for normal operation, with readahead;
38/// `PreciseReads` for reading small amounts as needed.
39///
40/// ## Normal operation, `P = ()`
41///
42/// When the SOCKS protocol implementation wants to see more data,
43/// [`RecvStep::<()>::buf`] is all of the free space in the buffer.
44///
45/// The caller will typically read whatever data is available,
46/// including possibly data sent by the peer *after* the end of the SOCKS handshake.
47/// If so, that data will eventually be returned, after the handshake is complete,
48/// by [`Finished::into_output_and_slice`] or [`Finished::into_output_and_vec`].
49///
50/// ## Avoiding read-ahead, `P = PreciseReads`
51///
52/// [`RecvStep::<PreciseReads>::buf()`] is only as long as the SOCKS protocol implementation
53/// *knows* that it needs.
54///
55/// Typically this is a very small buffer, often only one byte.
56/// This means that a single protocol exchange will involve many iterations
57/// each returning a `RecvStep`,
58/// and (depending on the caller) each implying one `recv(2)` call or similar.
59/// This is not very performant.
60/// But it does allow the implementation to avoid reading ahead.
61///
62/// In this mode, `Finished::into_output` is available,
63/// which returns only the output.
64pub trait ReadPrecision: ReadPrecisionSealed + Default + Copy + Debug {}
65impl ReadPrecision for PreciseReads {}
66impl ReadPrecision for () {}
67
68/// Sealed, and adjustment of `RecvStep::buf`
69pub trait ReadPrecisionSealed {
70    /// Adjust `buf` to `deficit`, iff we're doing precise reads
71    fn recv_step_buf(buf: &mut [u8], deficit: NonZeroUsize) -> &mut [u8];
72}
73impl ReadPrecisionSealed for () {
74    fn recv_step_buf(buf: &mut [u8], _deficit: NonZeroUsize) -> &mut [u8] {
75        buf
76    }
77}
78impl ReadPrecisionSealed for PreciseReads {
79    fn recv_step_buf<'b>(buf: &mut [u8], deficit: NonZeroUsize) -> &mut [u8] {
80        &mut buf[0..deficit.into()]
81    }
82}
83
84/// Marker indicating precise reads
85///
86/// See [`ReadPrecision`].
87#[derive(Default, Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
88#[allow(clippy::exhaustive_structs)]
89pub struct PreciseReads;
90
91/// An input buffer containing maybe some socks data
92///
93/// `Buffer` has a capacity set at creation time,
94/// and records how much data it contains.
95///
96/// Data is consumed by [`step()`](Handshake::step), and
97/// received data is appended using a [`RecvStep`] returned from `step`.
98///
99/// The `P` type parameter indicates whether we're allowing read-ahead,
100/// or doing only precise reads.
101/// See [`ReadPrecision`] for details.
102//
103// `P` prevents accidentally mixing `Finished.into_output`
104// with reads into the whole buffer, not limited by the deficit.
105#[derive(Educe)]
106#[educe(Debug)]
107pub struct Buffer<P: ReadPrecision = ()> {
108    /// The actual buffer
109    #[educe(Debug(ignore))]
110    buf: Box<[u8]>,
111
112    /// `[0..filled]` has data that's been read but not yet drained
113    filled: usize,
114
115    /// Marker for the precision
116    //
117    // We don't need PhantomData, since P is always a Copy unit.
118    #[allow(dead_code)]
119    precision: P,
120}
121
122/// Next step to take in the handshake
123///
124/// Returned by [`Handshake::step`].
125///
126/// Instructions from the handshake implementation.
127/// Caller should match on this and perform the requested action.
128//
129// This is an enum, rather than a struct with fields representing different components
130// of an instruction, because an enum demands of the caller that they do precise one thing.
131// With a compound instruction struct, it would be quite easy for a caller to
132// (sometimes) fail to execute some part(s).
133#[derive(Debug)]
134#[allow(clippy::exhaustive_enums)] // callers have no good response to unknown variants anyway
135pub enum NextStep<'b, O, P: ReadPrecision> {
136    /// Caller should send this data to the peer
137    Send(Vec<u8>),
138
139    /// Caller should read from the peer and call one of the `received` functions.
140    Recv(RecvStep<'b, P>),
141
142    /// The handshake is complete
143    ///
144    /// The returned [`Finished`] can be used to obtain the handshake output.
145    ///
146    /// The `Handshake` should not be used any more after this.
147    Finished(Finished<'b, O, P>),
148}
149
150/// A completed handshake
151///
152/// Represents:
153///  * [`Handshake::Output`],
154///    a value representing the meaning of the completed protocol exchange.
155///  * Possibly, some data which was received, but didn't form part of the protocol.
156//
157// Returning this in `NextStep::finished` means that the caller can access the output
158// iff the handshake as finished.  Also, this type's API helps prevent accidental
159// discard of any readahead that there might be.
160#[derive(Debug)]
161#[must_use]
162pub struct Finished<'b, O, P: ReadPrecision> {
163    /// The buffer
164    buffer: &'b mut Buffer<P>,
165
166    /// Details of the completed handshake:
167    output: O,
168}
169
170impl<'b, O> Finished<'b, O, PreciseReads> {
171    /// Return (just) the output of the completed handshake
172    ///
173    /// Available only if the `Buffer` was constructed with [`Buffer::new_precise()`]
174    /// (or equivalent).
175    pub fn into_output(self) -> Result<O, Bug> {
176        if let Ok(nonzero) = NonZeroUsize::try_from(self.buffer.filled_slice().len()) {
177            Err(internal!(
178 "handshake complete, but we read too much earlier, and are now misframed by {nonzero} bytes!"
179            ))
180        } else {
181            Ok(self.output)
182        }
183    }
184}
185
186impl<'b, O, P: ReadPrecision> Finished<'b, O, P> {
187    /// Return the output, and the following already-read data as a slice
188    ///
189    /// (After callin gthis, the following already-read data
190    /// will no longer be in the `Buffer`.)
191    pub fn into_output_and_slice(self) -> (O, &'b [u8]) {
192        let filled = mem::take(&mut self.buffer.filled);
193        let data = &self.buffer.buf[0..filled];
194        (self.output, data)
195    }
196
197    /// Return the output, and the following already-read data as a `Vec`
198    ///
199    /// The `Vec` is quite likely to have a considerably larger capacity than contents.
200    /// (Its capacity is usually the original buffer size, when the `Buffer` was created.)
201    ///
202    /// The `Buffer` should not be discarded after calling this;
203    /// it will not be usable.
204    //
205    // Ideally, this would *consume* the Buffer.  But that would mean that
206    // `step` would have to take and return the buffer,
207    // which would be quite inconvenient at call sites.
208    pub fn into_output_and_vec(self) -> (O, Vec<u8>) {
209        let mut data = mem::take(&mut self.buffer.buf).into_vec();
210        data.truncate(self.buffer.filled);
211        (self.output, data)
212    }
213
214    /// Return the output of the completed handshake, declaring any readahead a protocol error
215    ///
216    /// This function is appropriate when the peer is not supposed to send data
217    /// until the handshake is complete.
218    /// If data *did* arrive before then, and was read, we call it a protocol error,
219    /// [`Error::ForbiddenPipelining`].
220    pub fn into_output_forbid_pipelining(self) -> Result<O, Error> {
221        if !self.buffer.filled_slice().is_empty() {
222            Err(Error::ForbiddenPipelining)
223        } else {
224            Ok(self.output)
225        }
226    }
227}
228
229/// Next step - details for reading from the peer
230///
231/// Value in [`NextStep::Recv`].
232///
233/// Caller should read from the peer and call one of the `received` functions.
234/// Specifically, caller should do one of the following:
235///
236///  1. Read some data into the slice returned by [`.buf()`](RecvStep::buf),
237///     and then call [`.note_received()`](RecvStep::note_received).
238///
239///  2. Determine the available buffer space with [`.buf()`](RecvStep::buf)`.len()`,
240///     write some data into the buffer's [`unfilled_slice()`](Buffer::unfilled_slice),
241///     and call [`Buffer::note_received`].
242///     This allows the caller to
243///     dispose of the [`RecvStep`] (which mutably borrows the `Buffer`)
244///     while reading,
245///     at the cost of slightly less correctness checking by the compiler.
246///
247/// The caller should *not* wait for enough data to fill the whole `buf`.
248#[derive(Debug)]
249pub struct RecvStep<'b, P: ReadPrecision> {
250    /// The buffer
251    buffer: &'b mut Buffer<P>,
252
253    /// Lower bound on the number of bytes that the handshake needs to read to complete.
254    ///
255    /// Useful only for callers that want to avoid reading beyond the end of the handshake.
256    /// Always `<= .buf().len()`.
257    ///
258    /// The returned value has the same semantics as
259    /// [`tor_bytes::IncompleteMessage.deficit`.
260    deficit: NonZeroUsize,
261}
262
263impl<'b, P: ReadPrecision> RecvStep<'b, P> {
264    /// Returns the buffer slice the caller should write data into.
265    ///
266    /// For precise reads, returns the slice of the buffer of length `deficit`.
267    /// sol as to avoid reading ahead beyond the end of the handshake.
268    pub fn buf(&mut self) -> &mut [u8] {
269        P::recv_step_buf(self.buffer.unfilled_slice(), self.deficit)
270    }
271
272    /// Notes that `len` bytes have been received.
273    ///
274    /// The actual data must already have been written to the slice from `.buf()`.
275    ///
276    /// If `len == 0`, treats this as having received EOF (which is an error).
277    ///
278    /// # Panics
279    ///
280    /// `len` must be no more than `.buf().len()`.
281    pub fn note_received(self, len: usize) -> Result<(), Error> {
282        let len = len
283            .try_into()
284            .map_err(|_: TryFromIntError| Error::UnexpectedEof)?;
285        self.buffer.note_received(len);
286        Ok(())
287    }
288}
289
290impl<P: ReadPrecision> Default for Buffer<P> {
291    fn default() -> Self {
292        Buffer::with_size(SOCKS_BUF_LEN)
293    }
294}
295
296impl Buffer<()> {
297    /// Creates a new default `Buffer`
298    pub fn new() -> Self {
299        Self::default()
300    }
301}
302
303impl Buffer<PreciseReads> {
304    /// Creates a new `Buffer` for reeading precisely
305    ///
306    /// ```
307    /// use tor_socksproto::{Handshake as _, SocksProxyHandshake, SocksRequest};
308    ///
309    /// let mut hs = SocksProxyHandshake::new();
310    /// let mut buf = tor_socksproto::Buffer::new_precise();
311    /// ```
312    pub fn new_precise() -> Self {
313        Self::default()
314    }
315}
316
317impl<P: ReadPrecision> Buffer<P> {
318    /// Creates a new `Buffer` with a specified size
319    ///
320    /// Specify the `P` type parameter according to whether you wanted
321    /// a `Buffer` like from [`Buffer::new()`], which will read eagerly,
322    /// or one like from [`Buffer::new_precise()`], which will read eagerly,
323    /// See [`ReadPrecision`].
324    ///
325    /// ```
326    /// let mut buf = tor_socksproto::Buffer::<tor_socksproto::PreciseReads>::with_size(2048);
327    /// ```
328    pub fn with_size(size: usize) -> Self {
329        Buffer {
330            buf: vec![0xaa; size].into(),
331            filled: 0,
332            precision: P::default(),
333        }
334    }
335
336    /// Creates a new `Buffer` from a partially-filed buffer
337    ///
338    ///  * `buf[..filled]` should contain data already read from the peer
339    ///  * `buf[filled..]` should be zero (or other innocuous data),
340    ///    and will not be used (except if there are bugs)
341    ///
342    /// Using this and `into_parts` to obtain a `Buffer`
343    /// with a differetn the read precision (different `P` type parameter)
344    /// can result in malfunctions.
345    pub fn from_parts(buf: Box<[u8]>, filled: usize) -> Self {
346        Buffer {
347            buf,
348            filled,
349            precision: P::default(),
350        }
351    }
352
353    /// Disassembles a `Buffer`, returning the pieces
354    pub fn into_parts(self) -> (Box<[u8]>, usize) {
355        let Buffer {
356            buf,
357            filled,
358            precision: _,
359        } = self;
360        (buf, filled)
361    }
362
363    /// The portion of the buffer that is available for writing new data.
364    ///
365    /// The caller may fill this (from the beginning) with more data,
366    /// and then call [`Buffer::note_received`].
367    /// Normally, the caller will do this after receiving a [`NextStep::Recv`] instruction.
368    ///
369    /// Where possible, prefer [`RecvStep::buf`] and [`RecvStep::note_received`].
370    pub fn unfilled_slice(&mut self) -> &mut [u8] {
371        &mut self.buf[self.filled..]
372    }
373
374    /// The portion of the buffer that contains already-read, but unprocessed, data.
375    ///
376    /// Callers will not normally want this.
377    pub fn filled_slice(&mut self) -> &[u8] {
378        &self.buf[..self.filled]
379    }
380
381    /// Notes that `len` bytes have been received.
382    ///
383    /// The actual data must already have been written to the slice from `.unfilled_slice()`.
384    /// Where possible, prefer [`RecvStep::buf`] and [`RecvStep::note_received`].
385    ///
386    /// (It doesn't make sense to call this with `len == 0`.
387    /// If the `0` came from a read call, this indicates EOF -
388    /// but that might not be an error if the protocol implemnetation doesn't need more data.
389    /// [`RecvStep::note_received`] handles this properly.)
390    ///
391    /// # Panics
392    ///
393    /// `len` must be no more than `.unfilled_slice().len()`.
394    pub fn note_received(&mut self, len: NonZeroUsize) {
395        let len = usize::from(len);
396        assert!(len <= self.unfilled_slice().len());
397        self.filled += len;
398    }
399}
400
401define_derive_deftly! {
402    /// Macro-generated components for a handshake outer state structure
403    ///
404    /// # Requirements
405    ///
406    ///  * Must be a struct containing `state: State`
407    ///  * `State` must be in scope as a binding at the derivation site
408    ///  * `State` must have a unit variant `Failed`
409    ///  * One `Option` field must be decorated `#[deftly(handshake(output))]`
410    ///
411    /// # Generates
412    ///
413    ///  * Implementation of `HasHandshake`
414    ///  * Implementation of `HasHandshakeState`
415    ///  * Implementation of `HasHandshakeOutput`
416    //
417    // An alternative would be to have a each handwhake contain an enum
418    // which we handle here ourselves, moving `Done` and `failed` here.
419    // But currently each handshake stores state outside `state`;
420    // some more intermediate structs would be needed.
421    Handshake for struct, expect items:
422
423    impl $crate::handshake::framework::HasHandshakeState for $ttype {
424        fn set_failed(&mut self) {
425            self.state = State::Failed {};
426        }
427    }
428
429  $(
430    // This is supposed to happen precisely once
431    ${when fmeta(handshake(output))}
432
433    // This trick extracts the T from Option<T>
434    ${define OUTPUT { <$ftype as IntoIterator>::Item }}
435
436    impl $crate::handshake::framework::Handshake for $ttype {
437        type Output = $OUTPUT;
438    }
439
440    impl $crate::handshake::framework::HasHandshakeOutput<$OUTPUT> for $ttype {
441        fn take_output(&mut self) -> Option<$OUTPUT> {
442            // using UFCS arranges that we check that $ftype really is Option
443            Option::take(&mut self.$fname)
444        }
445    }
446  )
447}
448#[allow(unused_imports)] // false positives, rust#130570, see also derive-deftly #117
449#[allow(clippy::single_component_path_imports)] // false positive, see rust-clippy#13419
450use derive_deftly_template_Handshake; // for rustdoc's benefit
451
452/// The internal (implementation-side) representation of the next step to take
453///
454/// `handwhake_impl` may not consume nothing from the `Reader`
455/// and return `Reply { reply: vec![] }`,
456/// since that would imply an infinite loop.
457pub(crate) enum ImplNextStep {
458    /// Send some data to the peer
459    Reply {
460        /// The message to send
461        reply: Vec<u8>,
462    },
463
464    /// We're done.  The output is available.
465    Finished,
466}
467
468/// `Handshake` structs that have a state that can be `Failed`
469///
470/// Derive this with
471/// [`#[derive_deftly(Handshake)]`](derive_deftly_template_Handshake).
472pub(super) trait HasHandshakeState {
473    /// Set the state to `Failed`
474    fn set_failed(&mut self);
475}
476
477/// `Handshake` structs whose output can be obtained
478///
479/// Derive this with
480/// [`#[derive_deftly(Handshake)]`](derive_deftly_template_Handshake).
481pub(super) trait HasHandshakeOutput<O> {
482    /// Obtain the output from a handshake completed with [`.handshake`](Handshake::handshake)
483    ///
484    /// Call only if `Action` said `finished`, and then only once.
485    /// Otherwise, will return `None`.
486    fn take_output(&mut self) -> Option<O>;
487}
488
489/// `Handshake`s: `SocksClientHandshake` or `SocksProxyHandshake`
490pub(super) trait HandshakeImpl: HasHandshakeState {
491    /// Actual implementation, to be provided
492    ///
493    /// Does not need to handle setting the state to `Failed` on error.
494    /// But *does* need to handle setting the state to `Done` if applicable.
495    ///
496    /// May return the error from the `Reader`, in `Error::Decode`.
497    /// (For example,. `Error::Decode(tor_bytes::Error::Incomplete)`
498    /// if the message was incomplete and reading more data would help.)
499    fn handshake_impl(&mut self, r: &mut tor_bytes::Reader<'_>) -> crate::Result<ImplNextStep>;
500
501    /// Helper, used by public API implementations to call `handshake_impl`.
502    ///
503    /// Deals with:
504    ///  * Setting up the `Reader`
505    ///  * Determining the amount drained.
506    ///  * Avoiding infinite loops (detect nothing drained, nothing replied)
507    ///
508    /// Return value is `(drain, Result<ImplNextStep>)`.
509    fn call_handshake_impl(&mut self, input: &[u8]) -> (usize, crate::Result<ImplNextStep>) {
510        let mut b = Reader::from_possibly_incomplete_slice(input);
511        let rv = self.handshake_impl(&mut b);
512        let drain = b.consumed();
513
514        // avoid infinite loop
515        match &rv {
516            Ok(ImplNextStep::Reply { reply }) if reply.is_empty() && drain == 0 => {
517                return (
518                    0,
519                    Err(
520                        internal!("protocol implementation drained nothing, replied nothing")
521                            .into(),
522                    ),
523                )
524            }
525            _ => {}
526        };
527
528        (drain, rv)
529    }
530}
531
532/// Handshake
533#[allow(private_bounds)] // This is a sealed trait, that's expected
534pub trait Handshake: HandshakeImpl + HasHandshakeOutput<Self::Output> {
535    /// Output from the handshake: the meaning, as we understand it
536    type Output: Debug;
537
538    /// Drive a handshake forward, determining what the next step is
539    ///
540    /// ```no_run
541    /// # fn main() -> Result<(), anyhow::Error> {
542    /// use std::io::{Read as _, Write as _};
543    /// use tor_socksproto::{Handshake as _, SocksProxyHandshake, SocksRequest};
544    ///
545    /// let socket: std::net::TcpStream = todo!();
546    ///
547    /// let mut hs = SocksProxyHandshake::new();
548    /// let mut buf = tor_socksproto::Buffer::new();
549    /// let (request, data_read_ahead) = loop {
550    ///     use tor_socksproto::NextStep;
551    ///     match hs.step(&mut buf)? {
552    ///         NextStep::Send(data) => socket.write_all(&data)?,
553    ///         NextStep::Recv(recv) => {
554    ///             let got = socket.read(recv.buf())?;
555    ///             recv.note_received(got);
556    ///         },
557    ///         NextStep::Finished(request) => break request.into_output_and_vec(),
558    ///     }
559    /// };
560    /// let _: SocksRequest = request;
561    /// let _: Vec<u8> = data_read_ahead;
562    ///
563    /// // Or, with precise reading:
564    ///
565    /// //...
566    /// let mut buf = tor_socksproto::Buffer::new_precise();
567    /// let request = loop {
568    ///     use tor_socksproto::NextStep;
569    ///     match hs.step(&mut buf)? {
570    ///         //...
571    ///         NextStep::Finished(request) => break request.into_output()?,
572    /// #       _ => todo!(),
573    ///     }
574    /// };
575    /// let _: SocksRequest = request;
576    /// # }
577    /// ```
578    ///
579    /// See `[ReadPrecision]` for information about read precision and the `P` type parameter.
580    fn step<'b, P: ReadPrecision>(
581        &mut self,
582        buffer: &'b mut Buffer<P>,
583    ) -> Result<NextStep<'b, <Self as Handshake>::Output, P>, Error> {
584        let (drain, rv) = self.call_handshake_impl(buffer.filled_slice());
585
586        if let Err(Error::Decode(tor_bytes::Error::Incomplete { deficit, .. })) = rv {
587            let deficit = deficit.into_inner();
588            return if usize::from(deficit) > buffer.unfilled_slice().len() {
589                Err(Error::MessageTooLong {
590                    limit: buffer.buf.len(),
591                })
592            } else {
593                Ok(NextStep::Recv(RecvStep { buffer, deficit }))
594            };
595        };
596
597        let rv = rv?;
598
599        buffer.buf.copy_within(drain..buffer.filled, 0);
600        buffer.filled -= drain;
601
602        Ok(match rv {
603            ImplNextStep::Reply { reply } => NextStep::Send(reply),
604            ImplNextStep::Finished => {
605                let output = self.take_output().ok_or_else(|| internal!("no output!"))?;
606                NextStep::Finished(Finished { buffer, output })
607            }
608        })
609    }
610
611    /// Try to advance the handshake, given some peer input in
612    /// `input`.
613    ///
614    /// If there isn't enough input, gives a [`Truncated`].
615    /// In this case, *the caller must retain the input*, and pass it to a later
616    /// invocation of `handshake`.  Input should only be regarded as consumed when
617    /// the `Action::drain` field is nonzero.
618    ///
619    /// Other errors (besides `Truncated`) indicate a failure.
620    ///
621    /// On success, return an Action describing what to tell the peer,
622    /// and how much of its input to consume.
623    //
624    // When removing this API, also remove `Action`.
625    #[deprecated = "use the new Handshake::step API instead"]
626    fn handshake(&mut self, input: &[u8]) -> crate::TResult<Action> {
627        let (drain, rv) = self.call_handshake_impl(input);
628        match rv {
629            #[allow(deprecated)]
630            Err(Error::Decode(
631                tor_bytes::Error::Incomplete { .. } | tor_bytes::Error::Truncated,
632            )) => Err(Truncated::new()),
633            Err(e) => {
634                self.set_failed();
635                Ok(Err(e))
636            }
637            Ok(ImplNextStep::Reply { reply }) => Ok(Ok(Action {
638                drain,
639                reply,
640                finished: false,
641            })),
642            Ok(ImplNextStep::Finished) => Ok(Ok(Action {
643                drain,
644                reply: vec![],
645                finished: true,
646            })),
647        }
648    }
649
650    /// [`Handshake::handshake`] for tests
651    ///
652    /// This wrapper function allows us to avoid writing many (or broad) allows in our tests.
653    #[cfg(test)]
654    #[allow(deprecated)]
655    fn handshake_for_tests(&mut self, input: &[u8]) -> crate::TResult<Action> {
656        self.handshake(input)
657    }
658}