Tor 0.4.9.0-alpha-dev
connection.c
Go to the documentation of this file.
1/* Copyright (c) 2001 Matej Pfajfar.
2 * Copyright (c) 2001-2004, Roger Dingledine.
3 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4 * Copyright (c) 2007-2021, The Tor Project, Inc. */
5/* See LICENSE for licensing information */
6
7/**
8 * \file connection.c
9 * \brief General high-level functions to handle reading and writing
10 * on connections.
11 *
12 * Each connection (ideally) represents a TLS connection, a TCP socket, a unix
13 * socket, or a UDP socket on which reads and writes can occur. (But see
14 * connection_edge.c for cases where connections can also represent streams
15 * that do not have a corresponding socket.)
16 *
17 * The module implements the abstract type, connection_t. The subtypes are:
18 * <ul>
19 * <li>listener_connection_t, implemented here in connection.c
20 * <li>dir_connection_t, implemented in directory.c
21 * <li>or_connection_t, implemented in connection_or.c
22 * <li>edge_connection_t, implemented in connection_edge.c, along with
23 * its subtype(s):
24 * <ul><li>entry_connection_t, also implemented in connection_edge.c
25 * </ul>
26 * <li>control_connection_t, implemented in control.c
27 * </ul>
28 *
29 * The base type implemented in this module is responsible for basic
30 * rate limiting, flow control, and marshalling bytes onto and off of the
31 * network (either directly or via TLS).
32 *
33 * Connections are registered with the main loop with connection_add(). As
34 * they become able to read or write register the fact with the event main
35 * loop by calling connection_watch_events(), connection_start_reading(), or
36 * connection_start_writing(). When they no longer want to read or write,
37 * they call connection_stop_reading() or connection_stop_writing().
38 *
39 * To queue data to be written on a connection, call
40 * connection_buf_add(). When data arrives, the
41 * connection_process_inbuf() callback is invoked, which dispatches to a
42 * type-specific function (such as connection_edge_process_inbuf() for
43 * example). Connection types that need notice of when data has been written
44 * receive notification via connection_flushed_some() and
45 * connection_finished_flushing(). These functions all delegate to
46 * type-specific implementations.
47 *
48 * Additionally, beyond the core of connection_t, this module also implements:
49 * <ul>
50 * <li>Listeners, which wait for incoming sockets and launch connections
51 * <li>Outgoing SOCKS proxy support
52 * <li>Outgoing HTTP proxy support
53 * <li>An out-of-sockets handler for dealing with socket exhaustion
54 * </ul>
55 **/
56
57#define CONNECTION_PRIVATE
58#include "core/or/or.h"
60#include "lib/buf/buffers.h"
61#include "lib/tls/buffers_tls.h"
62#include "lib/err/backtrace.h"
63
64/*
65 * Define this so we get channel internal functions, since we're implementing
66 * part of a subclass (channel_tls_t).
67 */
68#define CHANNEL_OBJECT_PRIVATE
69#include "app/config/config.h"
74#include "core/or/channel.h"
75#include "core/or/channeltls.h"
77#include "core/or/circuitlist.h"
78#include "core/or/circuituse.h"
81#include "core/or/dos.h"
82#include "core/or/policies.h"
83#include "core/or/reasons.h"
84#include "core/or/relay.h"
85#include "core/or/status.h"
86#include "core/or/crypt_path.h"
87#include "core/proto/proto_haproxy.h"
100#include "feature/hs/hs_common.h"
101#include "feature/hs/hs_ident.h"
106#include "feature/relay/dns.h"
112#include "feature/stats/bwhist.h"
115#include "lib/geoip/geoip.h"
116
117#include "lib/cc/ctassert.h"
118#include "lib/sandbox/sandbox.h"
119#include "lib/net/buffers_net.h"
120#include "lib/net/address.h"
121#include "lib/tls/tortls.h"
124
125#ifdef HAVE_PWD_H
126#include <pwd.h>
127#endif
128
129#ifdef HAVE_UNISTD_H
130#include <unistd.h>
131#endif
132#ifdef HAVE_SYS_STAT_H
133#include <sys/stat.h>
134#endif
135
136#ifdef HAVE_SYS_UN_H
137#include <sys/socket.h>
138#include <sys/un.h>
139#endif
140
146#include "core/or/port_cfg_st.h"
149
151
152/**
153 * On Windows and Linux we cannot reliably bind() a socket to an
154 * address and port if: 1) There's already a socket bound to wildcard
155 * address (0.0.0.0 or ::) with the same port; 2) We try to bind()
156 * to wildcard address and there's another socket bound to a
157 * specific address and the same port.
158 *
159 * To address this problem on these two platforms we implement a
160 * routine that:
161 * 1) Checks if first attempt to bind() a new socket failed with
162 * EADDRINUSE.
163 * 2) If so, it will close the appropriate old listener connection and
164 * 3) Attempts bind()'ing the new listener socket again.
165 *
166 * Just to be safe, we are enabling listener rebind code on all platforms,
167 * to account for unexpected cases where it may be needed.
168 */
169#define ENABLE_LISTENER_REBIND
170
172 const struct sockaddr *listensockaddr,
173 socklen_t listensocklen, int type,
174 const char *address,
175 const port_cfg_t *portcfg,
176 int *addr_in_use);
178 const port_cfg_t *port,
179 int *defer, int *addr_in_use);
180static void connection_init(time_t now, connection_t *conn, int type,
181 int socket_family);
182static int connection_handle_listener_read(connection_t *conn, int new_type);
184static int connection_flushed_some(connection_t *conn);
186static int connection_reached_eof(connection_t *conn);
188 ssize_t *max_to_read,
189 int *socket_error);
191static void set_constrained_socket_buffers(tor_socket_t sock, int size);
192
193static const char *connection_proxy_state_to_string(int state);
196static const char *proxy_type_to_string(int proxy_type);
197static int conn_get_proxy_type(const connection_t *conn);
199 const or_options_t *options, unsigned int conn_type);
200static void reenable_blocked_connection_init(const or_options_t *options);
202
203/** The last addresses that our network interface seemed to have been
204 * binding to. We use this as one way to detect when our IP changes.
205 *
206 * XXXX+ We should really use the entire list of interfaces here.
207 **/
209/* DOCDOC last_interface_ipv6 */
210static tor_addr_t *last_interface_ipv6 = NULL;
211/** A list of tor_addr_t for addresses we've used in outgoing connections.
212 * Used to detect IP address changes. */
214
215#define CASE_ANY_LISTENER_TYPE \
216 case CONN_TYPE_OR_LISTENER: \
217 case CONN_TYPE_EXT_OR_LISTENER: \
218 case CONN_TYPE_AP_LISTENER: \
219 case CONN_TYPE_DIR_LISTENER: \
220 case CONN_TYPE_CONTROL_LISTENER: \
221 case CONN_TYPE_AP_TRANS_LISTENER: \
222 case CONN_TYPE_AP_NATD_LISTENER: \
223 case CONN_TYPE_AP_DNS_LISTENER: \
224 case CONN_TYPE_AP_HTTP_CONNECT_LISTENER: \
225 case CONN_TYPE_METRICS_LISTENER
226
227/**************************************************************/
228
229/**
230 * Cast a `connection_t *` to a `listener_connection_t *`.
231 *
232 * Exit with an assertion failure if the input is not a
233 * `listener_connection_t`.
234 **/
237{
238 tor_assert(c->magic == LISTENER_CONNECTION_MAGIC);
240}
241
242/**
243 * Cast a `const connection_t *` to a `const listener_connection_t *`.
244 *
245 * Exit with an assertion failure if the input is not a
246 * `listener_connection_t`.
247 **/
250{
251 return TO_LISTENER_CONN((connection_t *)c);
252}
253
254size_t
255connection_get_inbuf_len(const connection_t *conn)
256{
257 return conn->inbuf ? buf_datalen(conn->inbuf) : 0;
258}
259
260size_t
261connection_get_outbuf_len(const connection_t *conn)
262{
263 return conn->outbuf ? buf_datalen(conn->outbuf) : 0;
264}
265
266/**
267 * Return the human-readable name for the connection type <b>type</b>
268 */
269const char *
271{
272 static char buf[64];
273 switch (type) {
274 case CONN_TYPE_OR_LISTENER: return "OR listener";
275 case CONN_TYPE_OR: return "OR";
276 case CONN_TYPE_EXIT: return "Exit";
277 case CONN_TYPE_AP_LISTENER: return "Socks listener";
279 return "Transparent pf/netfilter listener";
280 case CONN_TYPE_AP_NATD_LISTENER: return "Transparent natd listener";
281 case CONN_TYPE_AP_DNS_LISTENER: return "DNS listener";
282 case CONN_TYPE_AP: return "Socks";
283 case CONN_TYPE_DIR_LISTENER: return "Directory listener";
284 case CONN_TYPE_DIR: return "Directory";
285 case CONN_TYPE_CONTROL_LISTENER: return "Control listener";
286 case CONN_TYPE_CONTROL: return "Control";
287 case CONN_TYPE_EXT_OR: return "Extended OR";
288 case CONN_TYPE_EXT_OR_LISTENER: return "Extended OR listener";
289 case CONN_TYPE_AP_HTTP_CONNECT_LISTENER: return "HTTP tunnel listener";
290 case CONN_TYPE_METRICS_LISTENER: return "Metrics listener";
291 case CONN_TYPE_METRICS: return "Metrics";
292 default:
293 log_warn(LD_BUG, "unknown connection type %d", type);
294 tor_snprintf(buf, sizeof(buf), "unknown [%d]", type);
295 return buf;
296 }
297}
298
299/**
300 * Return the human-readable name for the connection state <b>state</b>
301 * for the connection type <b>type</b>
302 */
303const char *
304conn_state_to_string(int type, int state)
305{
306 static char buf[96];
307 switch (type) {
308 CASE_ANY_LISTENER_TYPE:
309 if (state == LISTENER_STATE_READY)
310 return "ready";
311 break;
312 case CONN_TYPE_OR:
313 switch (state) {
314 case OR_CONN_STATE_CONNECTING: return "connect()ing";
315 case OR_CONN_STATE_PROXY_HANDSHAKING: return "handshaking (proxy)";
316 case OR_CONN_STATE_TLS_HANDSHAKING: return "handshaking (TLS)";
318 return "renegotiating (TLS, v2 handshake)";
320 return "waiting for renegotiation or V3 handshake";
322 return "handshaking (Tor, v2 handshake)";
324 return "handshaking (Tor, v3 handshake)";
325 case OR_CONN_STATE_OPEN: return "open";
326 }
327 break;
328 case CONN_TYPE_EXT_OR:
329 switch (state) {
331 return "waiting for authentication type";
333 return "waiting for client nonce";
335 return "waiting for client hash";
336 case EXT_OR_CONN_STATE_OPEN: return "open";
337 case EXT_OR_CONN_STATE_FLUSHING: return "flushing final OKAY";
338 }
339 break;
340 case CONN_TYPE_EXIT:
341 switch (state) {
342 case EXIT_CONN_STATE_RESOLVING: return "waiting for dest info";
343 case EXIT_CONN_STATE_CONNECTING: return "connecting";
344 case EXIT_CONN_STATE_OPEN: return "open";
345 case EXIT_CONN_STATE_RESOLVEFAILED: return "resolve failed";
346 }
347 break;
348 case CONN_TYPE_AP:
349 switch (state) {
350 case AP_CONN_STATE_SOCKS_WAIT: return "waiting for socks info";
351 case AP_CONN_STATE_NATD_WAIT: return "waiting for natd dest info";
352 case AP_CONN_STATE_RENDDESC_WAIT: return "waiting for rendezvous desc";
353 case AP_CONN_STATE_CONTROLLER_WAIT: return "waiting for controller";
354 case AP_CONN_STATE_CIRCUIT_WAIT: return "waiting for circuit";
355 case AP_CONN_STATE_CONNECT_WAIT: return "waiting for connect response";
356 case AP_CONN_STATE_RESOLVE_WAIT: return "waiting for resolve response";
357 case AP_CONN_STATE_OPEN: return "open";
358 }
359 break;
360 case CONN_TYPE_DIR:
361 switch (state) {
362 case DIR_CONN_STATE_CONNECTING: return "connecting";
363 case DIR_CONN_STATE_CLIENT_SENDING: return "client sending";
364 case DIR_CONN_STATE_CLIENT_READING: return "client reading";
365 case DIR_CONN_STATE_CLIENT_FINISHED: return "client finished";
366 case DIR_CONN_STATE_SERVER_COMMAND_WAIT: return "waiting for command";
367 case DIR_CONN_STATE_SERVER_WRITING: return "writing";
368 }
369 break;
371 switch (state) {
372 case CONTROL_CONN_STATE_OPEN: return "open (protocol v1)";
374 return "waiting for authentication (protocol v1)";
375 }
376 break;
377 }
378
379 if (state == 0) {
380 return "uninitialized";
381 }
382
383 log_warn(LD_BUG, "unknown connection state %d (type %d)", state, type);
384 tor_snprintf(buf, sizeof(buf),
385 "unknown state [%d] on unknown [%s] connection",
386 state, conn_type_to_string(type));
387 tor_assert_nonfatal_unreached_once();
388 return buf;
389}
390
391/**
392 * Helper: describe the peer or address of connection @a conn in a
393 * human-readable manner.
394 *
395 * Returns a pointer to a static buffer; future calls to
396 * connection_describe_peer_internal() will invalidate this buffer.
397 *
398 * If <b>include_preposition</b> is true, include a preposition before the
399 * peer address.
400 *
401 * Nobody should parse the output of this function; it can and will change in
402 * future versions of tor.
403 **/
404static const char *
406 bool include_preposition)
407{
408 IF_BUG_ONCE(!conn) {
409 return "null peer";
410 }
411
412 static char peer_buf[256];
413 const tor_addr_t *addr = &conn->addr;
414 const char *address = NULL;
415 const char *prep;
416 bool scrub = false;
417 char extra_buf[128];
418 extra_buf[0] = 0;
419
420 /* First, figure out the preposition to use */
421 switch (conn->type) {
422 CASE_ANY_LISTENER_TYPE:
423 prep = "on";
424 break;
425 case CONN_TYPE_EXIT:
426 prep = "to";
427 break;
429 case CONN_TYPE_AP:
430 case CONN_TYPE_EXT_OR:
431 prep = "from";
432 break;
433 default:
434 prep = "with";
435 break;
436 }
437
438 /* Now figure out the address. */
439 if (conn->socket_family == AF_UNIX) {
440 /* For unix sockets, we always use the `address` string. */
441 address = conn->address ? conn->address : "unix socket";
442 } else if (conn->type == CONN_TYPE_OR) {
443 /* For OR connections, we have a lot to do. */
444 const or_connection_t *or_conn = CONST_TO_OR_CONN(conn);
445 /* We report the IDs we're talking to... */
446 if (fast_digest_is_zero(or_conn->identity_digest)) {
447 // This could be a client, so scrub it. No identity to report.
448 scrub = true;
449 } else {
450 const ed25519_public_key_t *ed_id =
452 char ed_id_buf[ED25519_BASE64_LEN+1];
453 char rsa_id_buf[HEX_DIGEST_LEN+1];
454 if (ed_id) {
455 ed25519_public_to_base64(ed_id_buf, ed_id);
456 } else {
457 strlcpy(ed_id_buf, "<none>", sizeof(ed_id_buf));
458 }
459 base16_encode(rsa_id_buf, sizeof(rsa_id_buf),
460 or_conn->identity_digest, DIGEST_LEN);
461 tor_snprintf(extra_buf, sizeof(extra_buf),
462 " ID=%s RSA_ID=%s", ed_id_buf, rsa_id_buf);
463 }
464 if (! scrub && (! tor_addr_eq(addr, &or_conn->canonical_orport.addr) ||
465 conn->port != or_conn->canonical_orport.port)) {
466 /* We report canonical address, if it's different */
467 char canonical_addr_buf[TOR_ADDR_BUF_LEN];
468 if (tor_addr_to_str(canonical_addr_buf, &or_conn->canonical_orport.addr,
469 sizeof(canonical_addr_buf), 1)) {
470 tor_snprintf(extra_buf+strlen(extra_buf),
471 sizeof(extra_buf)-strlen(extra_buf),
472 " canonical_addr=%s:%"PRIu16,
473 canonical_addr_buf,
474 or_conn->canonical_orport.port);
475 }
476 }
477 } else if (conn->type == CONN_TYPE_EXIT) {
478 scrub = true; /* This is a client's request; scrub it with SafeLogging. */
479 if (tor_addr_is_null(addr)) {
480 address = conn->address;
481 strlcpy(extra_buf, " (DNS lookup pending)", sizeof(extra_buf));
482 }
483 }
484
485 char addr_buf[TOR_ADDR_BUF_LEN];
486 if (address == NULL) {
487 if (tor_addr_family(addr) == 0) {
488 address = "<unset>";
489 } else {
490 address = tor_addr_to_str(addr_buf, addr, sizeof(addr_buf), 1);
491 if (!address) {
492 address = "<can't format!>";
493 tor_assert_nonfatal_unreached_once();
494 }
495 }
496 }
497
498 char portbuf[7];
499 portbuf[0]=0;
500 if (scrub && get_options()->SafeLogging_ != SAFELOG_SCRUB_NONE) {
501 address = "[scrubbed]";
502 } else {
503 /* Only set the port if we're not scrubbing the address. */
504 if (conn->port != 0) {
505 tor_snprintf(portbuf, sizeof(portbuf), ":%d", conn->port);
506 }
507 }
508
509 const char *sp = include_preposition ? " " : "";
510 if (! include_preposition)
511 prep = "";
512
513 tor_snprintf(peer_buf, sizeof(peer_buf),
514 "%s%s%s%s%s", prep, sp, address, portbuf, extra_buf);
515 return peer_buf;
516}
517
518/**
519 * Describe the peer or address of connection @a conn in a
520 * human-readable manner.
521 *
522 * Returns a pointer to a static buffer; future calls to
523 * connection_describe_peer() or connection_describe() will invalidate this
524 * buffer.
525 *
526 * Nobody should parse the output of this function; it can and will change in
527 * future versions of tor.
528 **/
529const char *
531{
532 return connection_describe_peer_internal(conn, false);
533}
534
535/**
536 * Describe a connection for logging purposes.
537 *
538 * Returns a pointer to a static buffer; future calls to connection_describe()
539 * will invalidate this buffer.
540 *
541 * Nobody should parse the output of this function; it can and will change in
542 * future versions of tor.
543 **/
544const char *
546{
547 IF_BUG_ONCE(!conn) {
548 return "null connection";
549 }
550 static char desc_buf[256];
551 const char *peer = connection_describe_peer_internal(conn, true);
552 tor_snprintf(desc_buf, sizeof(desc_buf),
553 "%s connection (%s) %s",
555 conn_state_to_string(conn->type, conn->state),
556 peer);
557 return desc_buf;
558}
559
560/** Allocate and return a new dir_connection_t, initialized as by
561 * connection_init(). */
563dir_connection_new(int socket_family)
564{
565 dir_connection_t *dir_conn = tor_malloc_zero(sizeof(dir_connection_t));
566 connection_init(time(NULL), TO_CONN(dir_conn), CONN_TYPE_DIR, socket_family);
567 return dir_conn;
568}
569
570/** Allocate and return a new or_connection_t, initialized as by
571 * connection_init().
572 *
573 * Initialize active_circuit_pqueue.
574 *
575 * Set active_circuit_pqueue_last_recalibrated to current cell_ewma tick.
576 */
578or_connection_new(int type, int socket_family)
579{
580 or_connection_t *or_conn = tor_malloc_zero(sizeof(or_connection_t));
581 time_t now = time(NULL);
582 tor_assert(type == CONN_TYPE_OR || type == CONN_TYPE_EXT_OR);
583 connection_init(now, TO_CONN(or_conn), type, socket_family);
584
586 connection_or_set_canonical(or_conn, 0);
587
588 if (type == CONN_TYPE_EXT_OR) {
589 /* If we aren't told an address for this connection, we should
590 * presume it isn't local, and should be rate-limited. */
591 TO_CONN(or_conn)->always_rate_limit_as_remote = 1;
592 }
593
594 return or_conn;
595}
596
597/** Allocate and return a new entry_connection_t, initialized as by
598 * connection_init().
599 *
600 * Allocate space to store the socks_request.
601 */
603entry_connection_new(int type, int socket_family)
604{
605 entry_connection_t *entry_conn = tor_malloc_zero(sizeof(entry_connection_t));
606 tor_assert(type == CONN_TYPE_AP);
607 connection_init(time(NULL), ENTRY_TO_CONN(entry_conn), type, socket_family);
608 entry_conn->socks_request = socks_request_new();
609 /* If this is coming from a listener, we'll set it up based on the listener
610 * in a little while. Otherwise, we're doing this as a linked connection
611 * of some kind, and we should set it up here based on the socket family */
612 if (socket_family == AF_INET)
613 entry_conn->entry_cfg.ipv4_traffic = 1;
614 else if (socket_family == AF_INET6)
615 entry_conn->entry_cfg.ipv6_traffic = 1;
616
617 /* Initialize the read token bucket to the maximum value which is the same as
618 * no rate limiting. */
619 token_bucket_rw_init(&ENTRY_TO_EDGE_CONN(entry_conn)->bucket, INT32_MAX,
620 INT32_MAX, monotime_coarse_get_stamp());
621 return entry_conn;
622}
623
624/** Allocate and return a new edge_connection_t, initialized as by
625 * connection_init(). */
627edge_connection_new(int type, int socket_family)
628{
629 edge_connection_t *edge_conn = tor_malloc_zero(sizeof(edge_connection_t));
630 tor_assert(type == CONN_TYPE_EXIT);
631 connection_init(time(NULL), TO_CONN(edge_conn), type, socket_family);
632 /* Initialize the read token bucket to the maximum value which is the same as
633 * no rate limiting. */
634 token_bucket_rw_init(&edge_conn->bucket, INT32_MAX, INT32_MAX,
636 return edge_conn;
637}
638
639/** Allocate and return a new control_connection_t, initialized as by
640 * connection_init(). */
642control_connection_new(int socket_family)
643{
644 control_connection_t *control_conn =
645 tor_malloc_zero(sizeof(control_connection_t));
646 connection_init(time(NULL),
647 TO_CONN(control_conn), CONN_TYPE_CONTROL, socket_family);
648 return control_conn;
649}
650
651/** Allocate and return a new listener_connection_t, initialized as by
652 * connection_init(). */
654listener_connection_new(int type, int socket_family)
655{
656 listener_connection_t *listener_conn =
657 tor_malloc_zero(sizeof(listener_connection_t));
658 connection_init(time(NULL), TO_CONN(listener_conn), type, socket_family);
659 /* Listener connections aren't accounted for with note_connection() so do
660 * this explicitly so to count them. */
661 rep_hist_note_conn_opened(false, type, socket_family);
662 return listener_conn;
663}
664
665/** Allocate, initialize, and return a new connection_t subtype of <b>type</b>
666 * to make or receive connections of address family <b>socket_family</b>. The
667 * type should be one of the CONN_TYPE_* constants. */
669connection_new(int type, int socket_family)
670{
671 switch (type) {
672 case CONN_TYPE_OR:
673 case CONN_TYPE_EXT_OR:
674 return TO_CONN(or_connection_new(type, socket_family));
675
676 case CONN_TYPE_EXIT:
677 return TO_CONN(edge_connection_new(type, socket_family));
678
679 case CONN_TYPE_AP:
680 return ENTRY_TO_CONN(entry_connection_new(type, socket_family));
681
682 case CONN_TYPE_DIR:
683 return TO_CONN(dir_connection_new(socket_family));
684
686 return TO_CONN(control_connection_new(socket_family));
687
688 CASE_ANY_LISTENER_TYPE:
689 return TO_CONN(listener_connection_new(type, socket_family));
690
691 default: {
692 connection_t *conn = tor_malloc_zero(sizeof(connection_t));
693 connection_init(time(NULL), conn, type, socket_family);
694 return conn;
695 }
696 }
697}
698
699/** Initializes conn. (you must call connection_add() to link it into the main
700 * array).
701 *
702 * Set conn->magic to the correct value.
703 *
704 * Set conn->type to <b>type</b>. Set conn->s and conn->conn_array_index to
705 * -1 to signify they are not yet assigned.
706 *
707 * Initialize conn's timestamps to now.
708 */
709static void
710connection_init(time_t now, connection_t *conn, int type, int socket_family)
711{
712 static uint64_t n_connections_allocated = 1;
713
714 switch (type) {
715 case CONN_TYPE_OR:
716 case CONN_TYPE_EXT_OR:
717 conn->magic = OR_CONNECTION_MAGIC;
718 break;
719 case CONN_TYPE_EXIT:
720 conn->magic = EDGE_CONNECTION_MAGIC;
721 break;
722 case CONN_TYPE_AP:
723 conn->magic = ENTRY_CONNECTION_MAGIC;
724 break;
725 case CONN_TYPE_DIR:
726 conn->magic = DIR_CONNECTION_MAGIC;
727 break;
729 conn->magic = CONTROL_CONNECTION_MAGIC;
730 break;
731 CASE_ANY_LISTENER_TYPE:
732 conn->magic = LISTENER_CONNECTION_MAGIC;
733 break;
734 default:
735 conn->magic = BASE_CONNECTION_MAGIC;
736 break;
737 }
738
739 conn->s = TOR_INVALID_SOCKET; /* give it a default of 'not used' */
740 conn->conn_array_index = -1; /* also default to 'not used' */
741 conn->global_identifier = n_connections_allocated++;
742
743 conn->type = type;
744 conn->socket_family = socket_family;
745 if (!connection_is_listener(conn)) {
746 /* listeners never use their buf */
747 conn->inbuf = buf_new();
748 conn->outbuf = buf_new();
749 }
750
751 conn->timestamp_created = now;
752 conn->timestamp_last_read_allowed = now;
754}
755
756/** Create a link between <b>conn_a</b> and <b>conn_b</b>. */
757void
759{
760 tor_assert(! SOCKET_OK(conn_a->s));
761 tor_assert(! SOCKET_OK(conn_b->s));
762
763 conn_a->linked = 1;
764 conn_b->linked = 1;
765 conn_a->linked_conn = conn_b;
766 conn_b->linked_conn = conn_a;
767}
768
769/** Return true iff the provided connection listener type supports AF_UNIX
770 * sockets. */
771int
773{
774 /* For now only control ports or SOCKS ports can be Unix domain sockets
775 * and listeners at the same time */
776 switch (type) {
779 return 1;
780 default:
781 return 0;
782 }
783}
784
785/** Deallocate memory used by <b>conn</b>. Deallocate its buffers if
786 * necessary, close its socket if necessary, and mark the directory as dirty
787 * if <b>conn</b> is an OR or OP connection.
788 */
789STATIC void
791{
792 void *mem;
793 size_t memlen;
794 if (!conn)
795 return;
796
797 switch (conn->type) {
798 case CONN_TYPE_OR:
799 case CONN_TYPE_EXT_OR:
800 tor_assert(conn->magic == OR_CONNECTION_MAGIC);
801 mem = TO_OR_CONN(conn);
802 memlen = sizeof(or_connection_t);
803 break;
804 case CONN_TYPE_AP:
805 tor_assert(conn->magic == ENTRY_CONNECTION_MAGIC);
806 mem = TO_ENTRY_CONN(conn);
807 memlen = sizeof(entry_connection_t);
808 break;
809 case CONN_TYPE_EXIT:
810 tor_assert(conn->magic == EDGE_CONNECTION_MAGIC);
811 mem = TO_EDGE_CONN(conn);
812 memlen = sizeof(edge_connection_t);
813 break;
814 case CONN_TYPE_DIR:
815 tor_assert(conn->magic == DIR_CONNECTION_MAGIC);
816 mem = TO_DIR_CONN(conn);
817 memlen = sizeof(dir_connection_t);
818 break;
820 tor_assert(conn->magic == CONTROL_CONNECTION_MAGIC);
821 mem = TO_CONTROL_CONN(conn);
822 memlen = sizeof(control_connection_t);
823 break;
824 CASE_ANY_LISTENER_TYPE:
825 tor_assert(conn->magic == LISTENER_CONNECTION_MAGIC);
826 mem = TO_LISTENER_CONN(conn);
827 memlen = sizeof(listener_connection_t);
828 break;
829 default:
830 tor_assert(conn->magic == BASE_CONNECTION_MAGIC);
831 mem = conn;
832 memlen = sizeof(connection_t);
833 break;
834 }
835
836 if (conn->linked) {
837 log_info(LD_GENERAL, "Freeing linked %s connection [%s] with %d "
838 "bytes on inbuf, %d on outbuf.",
840 conn_state_to_string(conn->type, conn->state),
841 (int)connection_get_inbuf_len(conn),
842 (int)connection_get_outbuf_len(conn));
843 }
844
845 if (!connection_is_listener(conn)) {
846 buf_free(conn->inbuf);
847 buf_free(conn->outbuf);
848 } else {
849 if (conn->socket_family == AF_UNIX) {
850 /* For now only control and SOCKS ports can be Unix domain sockets
851 * and listeners at the same time */
853
854 if (unlink(conn->address) < 0 && errno != ENOENT) {
855 log_warn(LD_NET, "Could not unlink %s: %s", conn->address,
856 strerror(errno));
857 }
858 }
859 }
860
862
863 if (connection_speaks_cells(conn)) {
864 or_connection_t *or_conn = TO_OR_CONN(conn);
865 if (or_conn->tls) {
866 if (! SOCKET_OK(conn->s)) {
867 /* The socket has been closed by somebody else; we must tell the
868 * TLS object not to close it. */
869 tor_tls_release_socket(or_conn->tls);
870 } else {
871 /* The tor_tls_free() call below will close the socket; we must tell
872 * the code below not to close it a second time. */
874 conn->s = TOR_INVALID_SOCKET;
875 }
876 tor_tls_free(or_conn->tls);
877 or_conn->tls = NULL;
878 }
879 or_handshake_state_free(or_conn->handshake_state);
880 or_conn->handshake_state = NULL;
882 if (or_conn->chan) {
883 /* Owww, this shouldn't happen, but... */
884 channel_t *base_chan = TLS_CHAN_TO_BASE(or_conn->chan);
885 tor_assert(base_chan);
886 log_info(LD_CHANNEL,
887 "Freeing orconn at %p, saw channel %p with ID "
888 "%"PRIu64 " left un-NULLed",
889 or_conn, base_chan,
890 base_chan->global_identifier);
891 if (!CHANNEL_FINISHED(base_chan)) {
892 channel_close_for_error(base_chan);
893 }
894
895 or_conn->chan->conn = NULL;
896 or_conn->chan = NULL;
897 }
898 }
899 if (conn->type == CONN_TYPE_AP) {
900 entry_connection_t *entry_conn = TO_ENTRY_CONN(conn);
903 if (entry_conn->socks_request)
904 socks_request_free(entry_conn->socks_request);
905 if (entry_conn->pending_optimistic_data) {
906 buf_free(entry_conn->pending_optimistic_data);
907 }
908 if (entry_conn->sending_optimistic_data) {
909 buf_free(entry_conn->sending_optimistic_data);
910 }
911 }
912 if (CONN_IS_EDGE(conn)) {
913 hs_ident_edge_conn_free(TO_EDGE_CONN(conn)->hs_ident);
914 }
915 if (conn->type == CONN_TYPE_CONTROL) {
916 control_connection_t *control_conn = TO_CONTROL_CONN(conn);
917 tor_free(control_conn->safecookie_client_hash);
918 tor_free(control_conn->incoming_cmd);
919 tor_free(control_conn->current_cmd);
920 if (control_conn->ephemeral_onion_services) {
921 SMARTLIST_FOREACH(control_conn->ephemeral_onion_services, char *, cp, {
922 memwipe(cp, 0, strlen(cp));
923 tor_free(cp);
924 });
925 smartlist_free(control_conn->ephemeral_onion_services);
926 }
927 }
928
929 /* Probably already freed by connection_free. */
930 tor_event_free(conn->read_event);
931 tor_event_free(conn->write_event);
932 conn->read_event = conn->write_event = NULL;
933
934 if (conn->type == CONN_TYPE_DIR) {
935 dir_connection_t *dir_conn = TO_DIR_CONN(conn);
936 tor_free(dir_conn->requested_resource);
937
938 tor_compress_free(dir_conn->compress_state);
939 dir_conn_clear_spool(dir_conn);
940
941 hs_ident_dir_conn_free(dir_conn->hs_ident);
942 if (dir_conn->guard_state) {
943 /* Cancel before freeing, if it's still there. */
945 }
946 circuit_guard_state_free(dir_conn->guard_state);
947 }
948
949 if (SOCKET_OK(conn->s)) {
950 log_debug(LD_NET,"closing fd %d.",(int)conn->s);
951 tor_close_socket(conn->s);
952 conn->s = TOR_INVALID_SOCKET;
953 }
954
955 if (conn->type == CONN_TYPE_OR &&
956 !tor_digest_is_zero(TO_OR_CONN(conn)->identity_digest)) {
957 log_warn(LD_BUG, "called on OR conn with non-zeroed identity_digest");
959 }
960 if (conn->type == CONN_TYPE_OR || conn->type == CONN_TYPE_EXT_OR) {
961 tor_free(TO_OR_CONN(conn)->ext_or_auth_correct_client_hash);
962 tor_free(TO_OR_CONN(conn)->ext_or_transport);
963 }
964
965 memwipe(mem, 0xCC, memlen); /* poison memory */
966 tor_free(mem);
967}
968
969/** Make sure <b>conn</b> isn't in any of the global conn lists; then free it.
970 */
971MOCK_IMPL(void,
973{
974 if (!conn)
975 return;
978 if (BUG(conn->linked_conn)) {
979 conn->linked_conn->linked_conn = NULL;
980 if (! conn->linked_conn->marked_for_close &&
983 conn->linked_conn = NULL;
984 }
985 if (connection_speaks_cells(conn)) {
986 if (!tor_digest_is_zero(TO_OR_CONN(conn)->identity_digest)) {
988 }
989 }
990 if (conn->type == CONN_TYPE_CONTROL) {
992 }
993#if 1
994 /* DEBUGGING */
995 if (conn->type == CONN_TYPE_AP) {
996 connection_ap_warn_and_unmark_if_pending_circ(TO_ENTRY_CONN(conn),
997 "connection_free");
998 }
999#endif /* 1 */
1000
1001 /* Notify the circuit creation DoS mitigation subsystem that an OR client
1002 * connection has been closed. And only do that if we track it. */
1003 if (conn->type == CONN_TYPE_OR) {
1004 dos_close_client_conn(TO_OR_CONN(conn));
1005 }
1006
1009}
1010
1011/**
1012 * Called when we're about to finally unlink and free a connection:
1013 * perform necessary accounting and cleanup
1014 * - Directory conns that failed to fetch a rendezvous descriptor
1015 * need to inform pending rendezvous streams.
1016 * - OR conns need to call rep_hist_note_*() to record status.
1017 * - AP conns need to send a socks reject if necessary.
1018 * - Exit conns need to call connection_dns_remove() if necessary.
1019 * - AP and Exit conns need to send an end cell if they can.
1020 * - DNS conns need to fail any resolves that are pending on them.
1021 * - OR and edge connections need to be unlinked from circuits.
1022 */
1023void
1025{
1027
1028 switch (conn->type) {
1029 case CONN_TYPE_DIR:
1031 break;
1032 case CONN_TYPE_OR:
1033 case CONN_TYPE_EXT_OR:
1035 break;
1036 case CONN_TYPE_AP:
1038 break;
1039 case CONN_TYPE_EXIT:
1041 break;
1042 }
1043}
1044
1045/** Return true iff connection_close_immediate() has been called on this
1046 * connection. */
1047#define CONN_IS_CLOSED(c) \
1048 ((c)->linked ? ((c)->linked_conn_is_closed) : (! SOCKET_OK(c->s)))
1049
1050/** Close the underlying socket for <b>conn</b>, so we don't try to
1051 * flush it. Must be used in conjunction with (right before)
1052 * connection_mark_for_close().
1053 */
1054void
1056{
1057 assert_connection_ok(conn,0);
1058 if (CONN_IS_CLOSED(conn)) {
1059 log_err(LD_BUG,"Attempt to close already-closed connection.");
1061 return;
1062 }
1063 if (connection_get_outbuf_len(conn)) {
1064 log_info(LD_NET,"fd %d, type %s, state %s, %"TOR_PRIuSZ" bytes on outbuf.",
1065 (int)conn->s, conn_type_to_string(conn->type),
1066 conn_state_to_string(conn->type, conn->state),
1067 buf_datalen(conn->outbuf));
1068 }
1069
1071
1072 /* Prevent the event from getting unblocked. */
1073 conn->read_blocked_on_bw = 0;
1074 conn->write_blocked_on_bw = 0;
1075
1076 if (SOCKET_OK(conn->s))
1077 tor_close_socket(conn->s);
1078 conn->s = TOR_INVALID_SOCKET;
1079 if (conn->linked)
1080 conn->linked_conn_is_closed = 1;
1081 if (conn->outbuf)
1082 buf_clear(conn->outbuf);
1083}
1084
1085/** Mark <b>conn</b> to be closed next time we loop through
1086 * conn_close_if_marked() in main.c. */
1087void
1088connection_mark_for_close_(connection_t *conn, int line, const char *file)
1089{
1090 assert_connection_ok(conn,0);
1091 tor_assert(line);
1092 tor_assert(line < 1<<16); /* marked_for_close can only fit a uint16_t. */
1093 tor_assert(file);
1094
1095 if (conn->type == CONN_TYPE_OR) {
1096 /*
1097 * An or_connection should have been closed through one of the channel-
1098 * aware functions in connection_or.c. We'll assume this is an error
1099 * close and do that, and log a bug warning.
1100 */
1101 log_warn(LD_CHANNEL | LD_BUG,
1102 "Something tried to close an or_connection_t without going "
1103 "through channels at %s:%d",
1104 file, line);
1106 } else {
1107 /* Pass it down to the real function */
1108 connection_mark_for_close_internal_(conn, line, file);
1109 }
1110}
1111
1112/** Mark <b>conn</b> to be closed next time we loop through
1113 * conn_close_if_marked() in main.c.
1114 *
1115 * This _internal version bypasses the CONN_TYPE_OR checks; this should be
1116 * called when you either are sure that if this is an or_connection_t the
1117 * controlling channel has been notified (e.g. with
1118 * connection_or_notify_error()), or you actually are the
1119 * connection_or_close_for_error() or connection_or_close_normally() function.
1120 * For all other cases, use connection_mark_and_flush() which checks for
1121 * or_connection_t properly, instead. See below.
1122 *
1123 * We want to keep this function simple and quick, since it can be called from
1124 * quite deep in the call chain, and hence it should avoid having side-effects
1125 * that interfere with its callers view of the connection.
1126 */
1127MOCK_IMPL(void,
1129 int line, const char *file))
1130{
1131 assert_connection_ok(conn,0);
1132 tor_assert(line);
1133 tor_assert(line < 1<<16); /* marked_for_close can only fit a uint16_t. */
1134 tor_assert(file);
1135
1136 if (conn->marked_for_close) {
1137 log_warn(LD_BUG,"Duplicate call to connection_mark_for_close at %s:%d"
1138 " (first at %s:%d)", file, line, conn->marked_for_close_file,
1139 conn->marked_for_close);
1141 return;
1142 }
1143
1144 if (conn->type == CONN_TYPE_OR) {
1145 /*
1146 * Bad news if this happens without telling the controlling channel; do
1147 * this so we can find things that call this wrongly when the asserts hit.
1148 */
1149 log_debug(LD_CHANNEL,
1150 "Calling connection_mark_for_close_internal_() on an OR conn "
1151 "at %s:%d",
1152 file, line);
1153 }
1154
1155 conn->marked_for_close = line;
1156 conn->marked_for_close_file = file;
1158
1159 /* in case we're going to be held-open-til-flushed, reset
1160 * the number of seconds since last successful write, so
1161 * we get our whole 15 seconds */
1162 conn->timestamp_last_write_allowed = time(NULL);
1163
1164 /* Note the connection close. */
1166 conn->socket_family);
1167}
1168
1169/** Find each connection that has hold_open_until_flushed set to
1170 * 1 but hasn't written in the past 15 seconds, and set
1171 * hold_open_until_flushed to 0. This means it will get cleaned
1172 * up in the next loop through close_if_marked() in main.c.
1173 */
1174void
1176{
1177 time_t now;
1179
1180 now = time(NULL);
1181
1182 SMARTLIST_FOREACH_BEGIN(conns, connection_t *, conn) {
1183 /* If we've been holding the connection open, but we haven't written
1184 * for 15 seconds...
1185 */
1186 if (conn->hold_open_until_flushed) {
1188 if (now - conn->timestamp_last_write_allowed >= 15) {
1189 int severity;
1190 if (conn->type == CONN_TYPE_EXIT ||
1191 (conn->type == CONN_TYPE_DIR &&
1192 conn->purpose == DIR_PURPOSE_SERVER))
1193 severity = LOG_INFO;
1194 else
1195 severity = LOG_NOTICE;
1196 log_fn(severity, LD_NET,
1197 "Giving up on marked_for_close conn that's been flushing "
1198 "for 15s (fd %d, type %s, state %s).",
1199 (int)conn->s, conn_type_to_string(conn->type),
1200 conn_state_to_string(conn->type, conn->state));
1201 conn->hold_open_until_flushed = 0;
1202 }
1203 }
1204 } SMARTLIST_FOREACH_END(conn);
1205}
1206
1207#if defined(HAVE_SYS_UN_H) || defined(RUNNING_DOXYGEN)
1208/** Create an AF_UNIX listenaddr struct.
1209 * <b>listenaddress</b> provides the path to the Unix socket.
1210 *
1211 * Eventually <b>listenaddress</b> will also optionally contain user, group,
1212 * and file permissions for the new socket. But not yet. XXX
1213 * Also, since we do not create the socket here the information doesn't help
1214 * here.
1215 *
1216 * If not NULL <b>readable_address</b> will contain a copy of the path part of
1217 * <b>listenaddress</b>.
1218 *
1219 * The listenaddr struct has to be freed by the caller.
1220 */
1221static struct sockaddr_un *
1222create_unix_sockaddr(const char *listenaddress, char **readable_address,
1223 socklen_t *len_out)
1224{
1225 struct sockaddr_un *sockaddr = NULL;
1226
1227 sockaddr = tor_malloc_zero(sizeof(struct sockaddr_un));
1228 sockaddr->sun_family = AF_UNIX;
1229 if (strlcpy(sockaddr->sun_path, listenaddress, sizeof(sockaddr->sun_path))
1230 >= sizeof(sockaddr->sun_path)) {
1231 log_warn(LD_CONFIG, "Unix socket path '%s' is too long to fit.",
1232 escaped(listenaddress));
1233 tor_free(sockaddr);
1234 return NULL;
1235 }
1236
1237 if (readable_address)
1238 *readable_address = tor_strdup(listenaddress);
1239
1240 *len_out = sizeof(struct sockaddr_un);
1241 return sockaddr;
1242}
1243#else /* !(defined(HAVE_SYS_UN_H) || defined(RUNNING_DOXYGEN)) */
1244static struct sockaddr *
1245create_unix_sockaddr(const char *listenaddress, char **readable_address,
1246 socklen_t *len_out)
1247{
1248 (void)listenaddress;
1249 (void)readable_address;
1251 "Unix domain sockets not supported, yet we tried to create one.");
1252 *len_out = 0;
1254 return NULL;
1255}
1256#endif /* defined(HAVE_SYS_UN_H) || defined(RUNNING_DOXYGEN) */
1257
1258/* Log a rate-limited warning about resource exhaustion */
1259static void
1260warn_about_resource_exhaution(void)
1261{
1262#define WARN_TOO_MANY_CONNS_INTERVAL (6*60*60)
1263 static ratelim_t last_warned = RATELIM_INIT(WARN_TOO_MANY_CONNS_INTERVAL);
1264 char *m;
1265 if ((m = rate_limit_log(&last_warned, approx_time()))) {
1266 int n_conns = get_n_open_sockets();
1267 log_warn(LD_NET,"Failing because we have %d connections already. Please "
1268 "read doc/TUNING for guidance.%s", n_conns, m);
1269 tor_free(m);
1270 control_event_general_status(LOG_WARN, "TOO_MANY_CONNECTIONS CURRENT=%d",
1271 n_conns);
1272 }
1273}
1274
1275/**
1276 * A socket failed from file descriptor exhaustion.
1277 *
1278 * Note down file descriptor exhaustion and log a warning. */
1279static inline void
1281{
1282 rep_hist_note_overload(OVERLOAD_FD_EXHAUSTED);
1283 warn_about_resource_exhaution();
1284}
1285
1286/**
1287 * A socket failed from TCP port exhaustion.
1288 *
1289 * Note down TCP port exhaustion and log a warning. */
1290static inline void
1292{
1294 warn_about_resource_exhaution();
1295}
1296
1297#ifdef HAVE_SYS_UN_H
1298
1299#define UNIX_SOCKET_PURPOSE_CONTROL_SOCKET 0
1300#define UNIX_SOCKET_PURPOSE_SOCKS_SOCKET 1
1301
1302/** Check if the purpose isn't one of the ones we know what to do with */
1303
1304static int
1305is_valid_unix_socket_purpose(int purpose)
1306{
1307 int valid = 0;
1308
1309 switch (purpose) {
1310 case UNIX_SOCKET_PURPOSE_CONTROL_SOCKET:
1311 case UNIX_SOCKET_PURPOSE_SOCKS_SOCKET:
1312 valid = 1;
1313 break;
1314 }
1315
1316 return valid;
1317}
1318
1319/** Return a string description of a unix socket purpose */
1320static const char *
1321unix_socket_purpose_to_string(int purpose)
1322{
1323 const char *s = "unknown-purpose socket";
1324
1325 switch (purpose) {
1326 case UNIX_SOCKET_PURPOSE_CONTROL_SOCKET:
1327 s = "control socket";
1328 break;
1329 case UNIX_SOCKET_PURPOSE_SOCKS_SOCKET:
1330 s = "SOCKS socket";
1331 break;
1332 }
1333
1334 return s;
1335}
1336
1337/** Check whether we should be willing to open an AF_UNIX socket in
1338 * <b>path</b>. Return 0 if we should go ahead and -1 if we shouldn't. */
1339static int
1340check_location_for_unix_socket(const or_options_t *options, const char *path,
1341 int purpose, const port_cfg_t *port)
1342{
1343 int r = -1;
1344 char *p = NULL;
1345
1346 tor_assert(is_valid_unix_socket_purpose(purpose));
1347
1348 p = tor_strdup(path);
1349 cpd_check_t flags = CPD_CHECK_MODE_ONLY;
1350 if (get_parent_directory(p)<0 || p[0] != '/') {
1351 log_warn(LD_GENERAL, "Bad unix socket address '%s'. Tor does not support "
1352 "relative paths for unix sockets.", path);
1353 goto done;
1354 }
1355
1356 if (port->is_world_writable) {
1357 /* World-writable sockets can go anywhere. */
1358 r = 0;
1359 goto done;
1360 }
1361
1362 if (port->is_group_writable) {
1363 flags |= CPD_GROUP_OK;
1364 }
1365
1366 if (port->relax_dirmode_check) {
1367 flags |= CPD_RELAX_DIRMODE_CHECK;
1368 }
1369
1370 if (check_private_dir(p, flags, options->User) < 0) {
1371 char *escpath, *escdir;
1372 escpath = esc_for_log(path);
1373 escdir = esc_for_log(p);
1374 log_warn(LD_GENERAL, "Before Tor can create a %s in %s, the directory "
1375 "%s needs to exist, and to be accessible only by the user%s "
1376 "account that is running Tor. (On some Unix systems, anybody "
1377 "who can list a socket can connect to it, so Tor is being "
1378 "careful.)",
1379 unix_socket_purpose_to_string(purpose), escpath, escdir,
1380 port->is_group_writable ? " and group" : "");
1381 tor_free(escpath);
1382 tor_free(escdir);
1383 goto done;
1384 }
1385
1386 r = 0;
1387 done:
1388 tor_free(p);
1389 return r;
1390}
1391#endif /* defined(HAVE_SYS_UN_H) */
1392
1393/** Tell the TCP stack that it shouldn't wait for a long time after
1394 * <b>sock</b> has closed before reusing its port. Return 0 on success,
1395 * -1 on failure. */
1396static int
1398{
1399#ifdef _WIN32
1400 (void) sock;
1401 return 0;
1402#else
1403 int one=1;
1404
1405 /* REUSEADDR on normal places means you can rebind to the port
1406 * right after somebody else has let it go. But REUSEADDR on win32
1407 * means you can bind to the port _even when somebody else
1408 * already has it bound_. So, don't do that on Win32. */
1409 if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (void*) &one,
1410 (socklen_t)sizeof(one)) == -1) {
1411 return -1;
1412 }
1413 return 0;
1414#endif /* defined(_WIN32) */
1415}
1416
1417#ifdef _WIN32
1418/** Tell the Windows TCP stack to prevent other applications from receiving
1419 * traffic from tor's open ports. Return 0 on success, -1 on failure. */
1420static int
1421make_win32_socket_exclusive(tor_socket_t sock)
1422{
1423#ifdef SO_EXCLUSIVEADDRUSE
1424 int one=1;
1425
1426 /* Any socket that sets REUSEADDR on win32 can bind to a port _even when
1427 * somebody else already has it bound_, and _even if the original socket
1428 * didn't set REUSEADDR_. Use EXCLUSIVEADDRUSE to prevent this port-stealing
1429 * on win32. */
1430 if (setsockopt(sock, SOL_SOCKET, SO_EXCLUSIVEADDRUSE, (void*) &one,
1431 (socklen_t)sizeof(one))) {
1432 return -1;
1433 }
1434 return 0;
1435#else /* !defined(SO_EXCLUSIVEADDRUSE) */
1436 (void) sock;
1437 return 0;
1438#endif /* defined(SO_EXCLUSIVEADDRUSE) */
1439}
1440#endif /* defined(_WIN32) */
1441
1442/** Max backlog to pass to listen. We start at */
1443static int listen_limit = INT_MAX;
1444
1445/* Listen on <b>fd</b> with appropriate backlog. Return as for listen. */
1446static int
1447tor_listen(tor_socket_t fd)
1448{
1449 int r;
1450
1451 if ((r = listen(fd, listen_limit)) < 0) {
1452 if (listen_limit == SOMAXCONN)
1453 return r;
1454 if ((r = listen(fd, SOMAXCONN)) == 0) {
1455 listen_limit = SOMAXCONN;
1456 log_warn(LD_NET, "Setting listen backlog to INT_MAX connections "
1457 "didn't work, but SOMAXCONN did. Lowering backlog limit.");
1458 }
1459 }
1460 return r;
1461}
1462
1463/** Bind a new non-blocking socket listening to the socket described
1464 * by <b>listensockaddr</b>.
1465 *
1466 * <b>address</b> is only used for logging purposes and to add the information
1467 * to the conn.
1468 *
1469 * Set <b>addr_in_use</b> to true in case socket binding fails with
1470 * EADDRINUSE.
1471 */
1472static connection_t *
1473connection_listener_new(const struct sockaddr *listensockaddr,
1474 socklen_t socklen,
1475 int type, const char *address,
1476 const port_cfg_t *port_cfg,
1477 int *addr_in_use)
1478{
1479 listener_connection_t *lis_conn;
1480 connection_t *conn = NULL;
1481 tor_socket_t s = TOR_INVALID_SOCKET; /* the socket we're going to make */
1482 or_options_t const *options = get_options();
1483 (void) options; /* Windows doesn't use this. */
1484#if defined(HAVE_PWD_H) && defined(HAVE_SYS_UN_H)
1485 const struct passwd *pw = NULL;
1486#endif
1487 uint16_t usePort = 0, gotPort = 0;
1488 int start_reading = 0;
1489 static int global_next_session_group = SESSION_GROUP_FIRST_AUTO;
1490 tor_addr_t addr;
1491 int exhaustion = 0;
1492
1493 if (addr_in_use)
1494 *addr_in_use = 0;
1495
1496 if (listensockaddr->sa_family == AF_INET ||
1497 listensockaddr->sa_family == AF_INET6) {
1498 int is_stream = (type != CONN_TYPE_AP_DNS_LISTENER);
1499 if (is_stream)
1500 start_reading = 1;
1501
1502 tor_addr_from_sockaddr(&addr, listensockaddr, &usePort);
1503 log_notice(LD_NET, "Opening %s on %s",
1504 conn_type_to_string(type), fmt_addrport(&addr, usePort));
1505
1507 is_stream ? SOCK_STREAM : SOCK_DGRAM,
1508 is_stream ? IPPROTO_TCP: IPPROTO_UDP);
1509 if (!SOCKET_OK(s)) {
1510 int e = tor_socket_errno(s);
1511 if (ERRNO_IS_RESOURCE_LIMIT(e)) {
1513 /*
1514 * We'll call the OOS handler at the error exit, so set the
1515 * exhaustion flag for it.
1516 */
1517 exhaustion = 1;
1518 } else {
1519 log_warn(LD_NET, "Socket creation failed: %s",
1520 tor_socket_strerror(e));
1521 }
1522 goto err;
1523 }
1524
1525 if (make_socket_reuseable(s) < 0) {
1526 log_warn(LD_NET, "Error setting SO_REUSEADDR flag on %s: %s",
1527 conn_type_to_string(type),
1528 tor_socket_strerror(errno));
1529 }
1530
1531#ifdef _WIN32
1532 if (make_win32_socket_exclusive(s) < 0) {
1533 log_warn(LD_NET, "Error setting SO_EXCLUSIVEADDRUSE flag on %s: %s",
1534 conn_type_to_string(type),
1535 tor_socket_strerror(errno));
1536 }
1537#endif /* defined(_WIN32) */
1538
1539#if defined(USE_TRANSPARENT) && defined(IP_TRANSPARENT)
1540 if (options->TransProxyType_parsed == TPT_TPROXY &&
1542 int one = 1;
1543 if (setsockopt(s, SOL_IP, IP_TRANSPARENT, (void*)&one,
1544 (socklen_t)sizeof(one)) < 0) {
1545 const char *extra = "";
1546 int e = tor_socket_errno(s);
1547 if (e == EPERM)
1548 extra = "TransTPROXY requires root privileges or similar"
1549 " capabilities.";
1550 log_warn(LD_NET, "Error setting IP_TRANSPARENT flag: %s.%s",
1551 tor_socket_strerror(e), extra);
1552 }
1553 }
1554#endif /* defined(USE_TRANSPARENT) && defined(IP_TRANSPARENT) */
1555
1556#ifdef IPV6_V6ONLY
1557 if (listensockaddr->sa_family == AF_INET6) {
1558 int one = 1;
1559 /* We need to set IPV6_V6ONLY so that this socket can't get used for
1560 * IPv4 connections. */
1561 if (setsockopt(s,IPPROTO_IPV6, IPV6_V6ONLY,
1562 (void*)&one, (socklen_t)sizeof(one)) < 0) {
1563 int e = tor_socket_errno(s);
1564 log_warn(LD_NET, "Error setting IPV6_V6ONLY flag: %s",
1565 tor_socket_strerror(e));
1566 /* Keep going; probably not harmful. */
1567 }
1568 }
1569#endif /* defined(IPV6_V6ONLY) */
1570
1571 if (bind(s,listensockaddr,socklen) < 0) {
1572 const char *helpfulhint = "";
1573 int e = tor_socket_errno(s);
1574 if (ERRNO_IS_EADDRINUSE(e)) {
1575 helpfulhint = ". Is Tor already running?";
1576 if (addr_in_use)
1577 *addr_in_use = 1;
1578 }
1579 log_warn(LD_NET, "Could not bind to %s:%u: %s%s", address, usePort,
1580 tor_socket_strerror(e), helpfulhint);
1581 goto err;
1582 }
1583
1584 if (is_stream) {
1585 if (tor_listen(s) < 0) {
1586 log_warn(LD_NET, "Could not listen on %s:%u: %s", address, usePort,
1587 tor_socket_strerror(tor_socket_errno(s)));
1588 goto err;
1589 }
1590 }
1591
1592 if (usePort != 0) {
1593 gotPort = usePort;
1594 } else {
1595 tor_addr_t addr2;
1596 struct sockaddr_storage ss;
1597 socklen_t ss_len=sizeof(ss);
1598 if (getsockname(s, (struct sockaddr*)&ss, &ss_len)<0) {
1599 log_warn(LD_NET, "getsockname() couldn't learn address for %s: %s",
1600 conn_type_to_string(type),
1601 tor_socket_strerror(tor_socket_errno(s)));
1602 gotPort = 0;
1603 }
1604 tor_addr_from_sockaddr(&addr2, (struct sockaddr*)&ss, &gotPort);
1605 }
1606#ifdef HAVE_SYS_UN_H
1607 /*
1608 * AF_UNIX generic setup stuff
1609 */
1610 } else if (listensockaddr->sa_family == AF_UNIX) {
1611 /* We want to start reading for both AF_UNIX cases */
1612 start_reading = 1;
1613
1615
1616 if (check_location_for_unix_socket(options, address,
1617 (type == CONN_TYPE_CONTROL_LISTENER) ?
1618 UNIX_SOCKET_PURPOSE_CONTROL_SOCKET :
1619 UNIX_SOCKET_PURPOSE_SOCKS_SOCKET, port_cfg) < 0) {
1620 goto err;
1621 }
1622
1623 log_notice(LD_NET, "Opening %s on %s",
1624 conn_type_to_string(type), address);
1625
1626 tor_addr_make_unspec(&addr);
1627
1628 if (unlink(address) < 0 && errno != ENOENT) {
1629 log_warn(LD_NET, "Could not unlink %s: %s", address,
1630 strerror(errno));
1631 goto err;
1632 }
1633
1634 s = tor_open_socket_nonblocking(AF_UNIX, SOCK_STREAM, 0);
1635 if (! SOCKET_OK(s)) {
1636 int e = tor_socket_errno(s);
1637 if (ERRNO_IS_RESOURCE_LIMIT(e)) {
1639 /*
1640 * We'll call the OOS handler at the error exit, so set the
1641 * exhaustion flag for it.
1642 */
1643 exhaustion = 1;
1644 } else {
1645 log_warn(LD_NET,"Socket creation failed: %s.", strerror(e));
1646 }
1647 goto err;
1648 }
1649
1650 if (bind(s, listensockaddr,
1651 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
1652 log_warn(LD_NET,"Bind to %s failed: %s.", address,
1653 tor_socket_strerror(tor_socket_errno(s)));
1654 goto err;
1655 }
1656
1657#ifdef HAVE_PWD_H
1658 if (options->User) {
1659 pw = tor_getpwnam(options->User);
1660 struct stat st;
1661 if (pw == NULL) {
1662 log_warn(LD_NET,"Unable to chown() %s socket: user %s not found.",
1663 address, options->User);
1664 goto err;
1665 } else if (fstat(s, &st) == 0 &&
1666 st.st_uid == pw->pw_uid && st.st_gid == pw->pw_gid) {
1667 /* No change needed */
1668 } else if (chown(sandbox_intern_string(address),
1669 pw->pw_uid, pw->pw_gid) < 0) {
1670 log_warn(LD_NET,"Unable to chown() %s socket: %s.",
1671 address, strerror(errno));
1672 goto err;
1673 }
1674 }
1675#endif /* defined(HAVE_PWD_H) */
1676
1677 {
1678 unsigned mode;
1679 const char *status;
1680 struct stat st;
1681 if (port_cfg->is_world_writable) {
1682 mode = 0666;
1683 status = "world-writable";
1684 } else if (port_cfg->is_group_writable) {
1685 mode = 0660;
1686 status = "group-writable";
1687 } else {
1688 mode = 0600;
1689 status = "private";
1690 }
1691 /* We need to use chmod; fchmod doesn't work on sockets on all
1692 * platforms. */
1693 if (fstat(s, &st) == 0 && (st.st_mode & 0777) == mode) {
1694 /* no change needed */
1695 } else if (chmod(sandbox_intern_string(address), mode) < 0) {
1696 log_warn(LD_FS,"Unable to make %s %s.", address, status);
1697 goto err;
1698 }
1699 }
1700
1701 if (listen(s, SOMAXCONN) < 0) {
1702 log_warn(LD_NET, "Could not listen on %s: %s", address,
1703 tor_socket_strerror(tor_socket_errno(s)));
1704 goto err;
1705 }
1706
1707#ifndef __APPLE__
1708 /* This code was introduced to help debug #28229. */
1709 int value;
1710 socklen_t len = sizeof(value);
1711
1712 if (!getsockopt(s, SOL_SOCKET, SO_ACCEPTCONN, &value, &len)) {
1713 if (value == 0) {
1714 log_err(LD_NET, "Could not listen on %s - "
1715 "getsockopt(.,SO_ACCEPTCONN,.) yields 0.", address);
1716 goto err;
1717 }
1718 }
1719#endif /* !defined(__APPLE__) */
1720#endif /* defined(HAVE_SYS_UN_H) */
1721 } else {
1722 log_err(LD_BUG, "Got unexpected address family %d.",
1723 listensockaddr->sa_family);
1724 tor_assert(0);
1725 }
1726
1727 lis_conn = listener_connection_new(type, listensockaddr->sa_family);
1728 conn = TO_CONN(lis_conn);
1729 conn->socket_family = listensockaddr->sa_family;
1730 conn->s = s;
1731 s = TOR_INVALID_SOCKET; /* Prevent double-close */
1732 conn->address = tor_strdup(address);
1733 conn->port = gotPort;
1734 tor_addr_copy(&conn->addr, &addr);
1735
1736 memcpy(&lis_conn->entry_cfg, &port_cfg->entry_cfg, sizeof(entry_port_cfg_t));
1737
1738 if (port_cfg->entry_cfg.isolation_flags) {
1739 lis_conn->entry_cfg.isolation_flags = port_cfg->entry_cfg.isolation_flags;
1740 if (port_cfg->entry_cfg.session_group >= 0) {
1741 lis_conn->entry_cfg.session_group = port_cfg->entry_cfg.session_group;
1742 } else {
1743 /* This can wrap after around INT_MAX listeners are opened. But I don't
1744 * believe that matters, since you would need to open a ridiculous
1745 * number of listeners while keeping the early ones open before you ever
1746 * hit this. An OR with a dozen ports open, for example, would have to
1747 * close and re-open its listeners every second for 4 years nonstop.
1748 */
1749 lis_conn->entry_cfg.session_group = global_next_session_group--;
1750 }
1751 }
1752
1753 if (connection_add(conn) < 0) { /* no space, forget it */
1754 log_warn(LD_NET,"connection_add for listener failed. Giving up.");
1755 goto err;
1756 }
1757
1758 log_fn(usePort==gotPort ? LOG_DEBUG : LOG_NOTICE, LD_NET,
1759 "%s listening on port %u.",
1760 conn_type_to_string(type), gotPort);
1761
1763 if (start_reading) {
1765 } else {
1768 }
1769
1770 /*
1771 * Normal exit; call the OOS handler since connection count just changed;
1772 * the exhaustion flag will always be zero here though.
1773 */
1775
1776 log_notice(LD_NET, "Opened %s", connection_describe(conn));
1777
1778 return conn;
1779
1780 err:
1781 if (SOCKET_OK(s))
1783 if (conn)
1784 connection_free(conn);
1785
1786 /* Call the OOS handler, indicate if we saw an exhaustion-related error */
1788
1789 return NULL;
1790}
1791
1792/**
1793 * Create a new listener connection for a given <b>port</b>. In case we
1794 * for a reason that is not an error condition, set <b>defer</b>
1795 * to true. If we cannot bind listening socket because address is already
1796 * in use, set <b>addr_in_use</b> to true.
1797 */
1798static connection_t *
1800 int *defer, int *addr_in_use)
1801{
1802 connection_t *conn;
1803 struct sockaddr *listensockaddr;
1804 socklen_t listensocklen = 0;
1805 char *address=NULL;
1806 int real_port = port->port == CFG_AUTO_PORT ? 0 : port->port;
1807 tor_assert(real_port <= UINT16_MAX);
1808
1809 if (defer)
1810 *defer = 0;
1811
1812 if (port->server_cfg.no_listen) {
1813 if (defer)
1814 *defer = 1;
1815 return NULL;
1816 }
1817
1818#ifndef _WIN32
1819 /* We don't need to be root to create a UNIX socket, so defer until after
1820 * setuid. */
1821 const or_options_t *options = get_options();
1822 if (port->is_unix_addr && !geteuid() && (options->User) &&
1823 strcmp(options->User, "root")) {
1824 if (defer)
1825 *defer = 1;
1826 return NULL;
1827 }
1828#endif /* !defined(_WIN32) */
1829
1830 if (port->is_unix_addr) {
1831 listensockaddr = (struct sockaddr *)
1832 create_unix_sockaddr(port->unix_addr,
1833 &address, &listensocklen);
1834 } else {
1835 listensockaddr = tor_malloc(sizeof(struct sockaddr_storage));
1836 listensocklen = tor_addr_to_sockaddr(&port->addr,
1837 real_port,
1838 listensockaddr,
1839 sizeof(struct sockaddr_storage));
1840 address = tor_addr_to_str_dup(&port->addr);
1841 }
1842
1843 if (listensockaddr) {
1844 conn = connection_listener_new(listensockaddr, listensocklen,
1845 port->type, address, port,
1846 addr_in_use);
1847 tor_free(listensockaddr);
1848 tor_free(address);
1849 } else {
1850 conn = NULL;
1851 }
1852
1853 return conn;
1854}
1855
1856/** Do basic sanity checking on a newly received socket. Return 0
1857 * if it looks ok, else return -1.
1858 *
1859 * Notably, some TCP stacks can erroneously have accept() return successfully
1860 * with socklen 0, when the client sends an RST before the accept call (as
1861 * nmap does). We want to detect that, and not go on with the connection.
1862 */
1863static int
1864check_sockaddr(const struct sockaddr *sa, int len, int level)
1865{
1866 int ok = 1;
1867
1868 if (sa->sa_family == AF_INET) {
1869 struct sockaddr_in *sin=(struct sockaddr_in*)sa;
1870 if (len != sizeof(struct sockaddr_in)) {
1871 log_fn(level, LD_NET, "Length of address not as expected: %d vs %d",
1872 len,(int)sizeof(struct sockaddr_in));
1873 ok = 0;
1874 }
1875 if (sin->sin_addr.s_addr == 0 || sin->sin_port == 0) {
1876 log_fn(level, LD_NET,
1877 "Address for new connection has address/port equal to zero.");
1878 ok = 0;
1879 }
1880 } else if (sa->sa_family == AF_INET6) {
1881 struct sockaddr_in6 *sin6=(struct sockaddr_in6*)sa;
1882 if (len != sizeof(struct sockaddr_in6)) {
1883 log_fn(level, LD_NET, "Length of address not as expected: %d vs %d",
1884 len,(int)sizeof(struct sockaddr_in6));
1885 ok = 0;
1886 }
1887 if (fast_mem_is_zero((void*)sin6->sin6_addr.s6_addr, 16) ||
1888 sin6->sin6_port == 0) {
1889 log_fn(level, LD_NET,
1890 "Address for new connection has address/port equal to zero.");
1891 ok = 0;
1892 }
1893 } else if (sa->sa_family == AF_UNIX) {
1894 ok = 1;
1895 } else {
1896 ok = 0;
1897 }
1898 return ok ? 0 : -1;
1899}
1900
1901/** Check whether the socket family from an accepted socket <b>got</b> is the
1902 * same as the one that <b>listener</b> is waiting for. If it isn't, log
1903 * a useful message and return -1. Else return 0.
1904 *
1905 * This is annoying, but can apparently happen on some Darwins. */
1906static int
1908{
1909 if (got != listener->socket_family) {
1910 log_info(LD_BUG, "A listener connection returned a socket with a "
1911 "mismatched family. %s for addr_family %d gave us a socket "
1912 "with address family %d. Dropping.",
1913 conn_type_to_string(listener->type),
1914 (int)listener->socket_family,
1915 (int)got);
1916 return -1;
1917 }
1918 return 0;
1919}
1920
1921/** The listener connection <b>conn</b> told poll() it wanted to read.
1922 * Call accept() on conn->s, and add the new connection if necessary.
1923 */
1924static int
1926{
1927 tor_socket_t news; /* the new socket */
1928 connection_t *newconn = 0;
1929 /* information about the remote peer when connecting to other routers */
1930 struct sockaddr_storage addrbuf;
1931 struct sockaddr *remote = (struct sockaddr*)&addrbuf;
1932 /* length of the remote address. Must be whatever accept() needs. */
1933 socklen_t remotelen = (socklen_t)sizeof(addrbuf);
1934 const or_options_t *options = get_options();
1935
1936 tor_assert((size_t)remotelen >= sizeof(struct sockaddr_in));
1937 memset(&addrbuf, 0, sizeof(addrbuf));
1938
1939 news = tor_accept_socket_nonblocking(conn->s,remote,&remotelen);
1940 if (!SOCKET_OK(news)) { /* accept() error */
1941 int e = tor_socket_errno(conn->s);
1942 if (ERRNO_IS_ACCEPT_EAGAIN(e)) {
1943 /*
1944 * they hung up before we could accept(). that's fine.
1945 *
1946 * give the OOS handler a chance to run though
1947 */
1949 return 0;
1950 } else if (ERRNO_IS_RESOURCE_LIMIT(e)) {
1952 /* Exhaustion; tell the OOS handler */
1954 return 0;
1955 }
1956 /* else there was a real error. */
1957 log_warn(LD_NET,"accept() failed: %s. Closing listener.",
1958 tor_socket_strerror(e));
1959 connection_mark_for_close(conn);
1960 /* Tell the OOS handler about this too */
1962 return -1;
1963 }
1964 log_debug(LD_NET,
1965 "Connection accepted on socket %d (child of fd %d).",
1966 (int)news,(int)conn->s);
1967
1968 /* We accepted a new conn; run OOS handler */
1970
1971 if (make_socket_reuseable(news) < 0) {
1972 if (tor_socket_errno(news) == EINVAL) {
1973 /* This can happen on OSX if we get a badly timed shutdown. */
1974 log_debug(LD_NET, "make_socket_reuseable returned EINVAL");
1975 } else {
1976 log_warn(LD_NET, "Error setting SO_REUSEADDR flag on %s: %s",
1977 conn_type_to_string(new_type),
1978 tor_socket_strerror(errno));
1979 }
1980 tor_close_socket(news);
1981 return 0;
1982 }
1983
1984 if (options->ConstrainedSockets)
1986
1987 if (check_sockaddr_family_match(remote->sa_family, conn) < 0) {
1988 tor_close_socket(news);
1989 return 0;
1990 }
1991
1992 if (conn->socket_family == AF_INET || conn->socket_family == AF_INET6 ||
1993 (conn->socket_family == AF_UNIX && new_type == CONN_TYPE_AP)) {
1994 tor_addr_t addr;
1995 uint16_t port;
1996 if (check_sockaddr(remote, remotelen, LOG_INFO)<0) {
1997 log_info(LD_NET,
1998 "accept() returned a strange address; closing connection.");
1999 tor_close_socket(news);
2000 return 0;
2001 }
2002
2003 tor_addr_from_sockaddr(&addr, remote, &port);
2004
2005 /* process entrance policies here, before we even create the connection */
2006 if (new_type == CONN_TYPE_AP) {
2007 /* check sockspolicy to see if we should accept it */
2008 if (socks_policy_permits_address(&addr) == 0) {
2009 log_notice(LD_APP,
2010 "Denying socks connection from untrusted address %s.",
2011 fmt_and_decorate_addr(&addr));
2013 tor_close_socket(news);
2014 return 0;
2015 }
2016 }
2017 if (new_type == CONN_TYPE_DIR) {
2018 /* check dirpolicy to see if we should accept it */
2019 if (dir_policy_permits_address(&addr) == 0) {
2020 log_notice(LD_DIRSERV,"Denying dir connection from address %s.",
2021 fmt_and_decorate_addr(&addr));
2023 tor_close_socket(news);
2024 return 0;
2025 }
2026 }
2027 if (new_type == CONN_TYPE_OR) {
2028 /* Assess with the connection DoS mitigation subsystem if this address
2029 * can open a new connection. */
2030 if (dos_conn_addr_get_defense_type(&addr) == DOS_CONN_DEFENSE_CLOSE) {
2032 tor_close_socket(news);
2033 return 0;
2034 }
2035 }
2036
2037 newconn = connection_new(new_type, conn->socket_family);
2038 newconn->s = news;
2039
2040 /* remember the remote address */
2041 tor_addr_copy(&newconn->addr, &addr);
2042 if (new_type == CONN_TYPE_AP && conn->socket_family == AF_UNIX) {
2043 newconn->port = 0;
2044 newconn->address = tor_strdup(conn->address);
2045 } else {
2046 newconn->port = port;
2047 newconn->address = tor_addr_to_str_dup(&addr);
2048 }
2049
2050 if (new_type == CONN_TYPE_AP && conn->socket_family != AF_UNIX) {
2051 log_info(LD_NET, "New SOCKS connection opened from %s.",
2052 fmt_and_decorate_addr(&addr));
2053 }
2054 if (new_type == CONN_TYPE_AP && conn->socket_family == AF_UNIX) {
2055 log_info(LD_NET, "New SOCKS AF_UNIX connection opened");
2056 }
2057 if (new_type == CONN_TYPE_CONTROL) {
2058 log_notice(LD_CONTROL, "New control connection opened from %s.",
2059 fmt_and_decorate_addr(&addr));
2060 }
2061 if (new_type == CONN_TYPE_METRICS) {
2062 log_info(LD_CONTROL, "New metrics connection opened from %s.",
2063 fmt_and_decorate_addr(&addr));
2064 }
2065
2066 } else if (conn->socket_family == AF_UNIX && conn->type != CONN_TYPE_AP) {
2068 tor_assert(new_type == CONN_TYPE_CONTROL);
2069 log_notice(LD_CONTROL, "New control connection opened.");
2070
2071 newconn = connection_new(new_type, conn->socket_family);
2072 newconn->s = news;
2073
2074 /* remember the remote address -- do we have anything sane to put here? */
2075 tor_addr_make_unspec(&newconn->addr);
2076 newconn->port = 1;
2077 newconn->address = tor_strdup(conn->address);
2078 } else {
2079 tor_assert(0);
2080 };
2081
2082 /* We are receiving this connection. */
2083 newconn->from_listener = 1;
2084
2085 if (connection_add(newconn) < 0) { /* no space, forget it */
2086 connection_free(newconn);
2087 return 0; /* no need to tear down the parent */
2088 }
2089
2090 if (connection_init_accepted_conn(newconn, TO_LISTENER_CONN(conn)) < 0) {
2091 if (! newconn->marked_for_close)
2092 connection_mark_for_close(newconn);
2093 return 0;
2094 }
2095
2096 note_connection(true /* inbound */, newconn);
2097
2098 return 0;
2099}
2100
2101/** Initialize states for newly accepted connection <b>conn</b>.
2102 *
2103 * If conn is an OR, start the TLS handshake.
2104 *
2105 * If conn is a transparent AP, get its original destination
2106 * and place it in circuit_wait.
2107 *
2108 * The <b>listener</b> parameter is only used for AP connections.
2109 */
2110int
2112 const listener_connection_t *listener)
2113{
2114 int rv;
2115
2117
2118 switch (conn->type) {
2119 case CONN_TYPE_EXT_OR:
2120 /* Initiate Extended ORPort authentication. */
2122 case CONN_TYPE_OR:
2123 connection_or_event_status(TO_OR_CONN(conn), OR_CONN_EVENT_NEW, 0);
2125 if (rv < 0) {
2127 }
2128 return rv;
2129 break;
2130 case CONN_TYPE_AP:
2131 memcpy(&TO_ENTRY_CONN(conn)->entry_cfg, &listener->entry_cfg,
2132 sizeof(entry_port_cfg_t));
2134 TO_ENTRY_CONN(conn)->socks_request->listener_type = listener->base_.type;
2135
2136 /* Any incoming connection on an entry port counts as user activity. */
2138
2139 switch (TO_CONN(listener)->type) {
2143 listener->entry_cfg.socks_prefer_no_auth;
2145 listener->entry_cfg.extended_socks5_codes;
2146 break;
2149 /* XXXX028 -- is this correct still, with the addition of
2150 * pending_entry_connections ? */
2156 break;
2159 }
2160 break;
2161 case CONN_TYPE_DIR:
2164 break;
2165 case CONN_TYPE_CONTROL:
2167 break;
2168 }
2169 return 0;
2170}
2171
2172/** Take conn, make a nonblocking socket; try to connect to
2173 * sa, binding to bindaddr if sa is not localhost. If fail, return -1 and if
2174 * applicable put your best guess about errno into *<b>socket_error</b>.
2175 * If connected return 1, if EAGAIN return 0.
2176 */
2177MOCK_IMPL(STATIC int,
2179 const struct sockaddr *sa,
2180 socklen_t sa_len,
2181 const struct sockaddr *bindaddr,
2182 socklen_t bindaddr_len,
2183 int *socket_error))
2184{
2185 tor_socket_t s;
2186 int inprogress = 0;
2187 const or_options_t *options = get_options();
2188
2189 tor_assert(conn);
2190 tor_assert(sa);
2191 tor_assert(socket_error);
2192
2194 /* We should never even try to connect anyplace if the network is
2195 * completely shut off.
2196 *
2197 * (We don't check net_is_disabled() here, since we still sometimes
2198 * want to open connections when we're in soft hibernation.)
2199 */
2200 static ratelim_t disablenet_violated = RATELIM_INIT(30*60);
2201 *socket_error = SOCK_ERRNO(ENETUNREACH);
2202 log_fn_ratelim(&disablenet_violated, LOG_WARN, LD_BUG,
2203 "Tried to open a socket with DisableNetwork set.");
2205 return -1;
2206 }
2207
2208 const int protocol_family = sa->sa_family;
2209 const int proto = (sa->sa_family == AF_INET6 ||
2210 sa->sa_family == AF_INET) ? IPPROTO_TCP : 0;
2211
2212 s = tor_open_socket_nonblocking(protocol_family, SOCK_STREAM, proto);
2213 if (! SOCKET_OK(s)) {
2214 /*
2215 * Early OOS handler calls; it matters if it's an exhaustion-related
2216 * error or not.
2217 */
2218 *socket_error = tor_socket_errno(s);
2219 if (ERRNO_IS_RESOURCE_LIMIT(*socket_error)) {
2222 } else {
2223 log_warn(LD_NET,"Error creating network socket: %s",
2224 tor_socket_strerror(*socket_error));
2226 }
2227 return -1;
2228 }
2229
2230 if (make_socket_reuseable(s) < 0) {
2231 log_warn(LD_NET, "Error setting SO_REUSEADDR flag on new connection: %s",
2232 tor_socket_strerror(errno));
2233 }
2234
2235 /* From ip(7): Inform the kernel to not reserve an ephemeral port when using
2236 * bind(2) with a port number of 0. The port will later be automatically
2237 * chosen at connect(2) time, in a way that allows sharing a source port as
2238 * long as the 4-tuple is unique.
2239 *
2240 * This is needed for relays using OutboundBindAddresses because the port
2241 * value in the bind address is set to 0. */
2242#ifdef IP_BIND_ADDRESS_NO_PORT
2243 static int try_ip_bind_address_no_port = 1;
2244 if (bindaddr && try_ip_bind_address_no_port &&
2245 setsockopt(s, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &(int){1}, sizeof(int))) {
2246 if (errno == EINVAL) {
2247 log_notice(LD_NET, "Tor was built with support for "
2248 "IP_BIND_ADDRESS_NO_PORT, but the current kernel "
2249 "doesn't support it. This might cause Tor to run out "
2250 "of ephemeral ports more quickly.");
2251 try_ip_bind_address_no_port = 0;
2252 } else {
2253 log_warn(LD_NET, "Error setting IP_BIND_ADDRESS_NO_PORT on new "
2254 "connection: %s", tor_socket_strerror(errno));
2255 }
2256 }
2257#endif
2258
2259 if (bindaddr && bind(s, bindaddr, bindaddr_len) < 0) {
2260 *socket_error = tor_socket_errno(s);
2261 if (ERRNO_IS_EADDRINUSE(*socket_error)) {
2264 } else {
2265 log_warn(LD_NET,"Error binding network socket: %s",
2266 tor_socket_strerror(*socket_error));
2268 }
2270 return -1;
2271 }
2272
2273 /*
2274 * We've got the socket open and bound; give the OOS handler a chance to
2275 * check against configured maximum socket number, but tell it no exhaustion
2276 * failure.
2277 */
2279
2280 tor_assert(options);
2281 if (options->ConstrainedSockets)
2283
2284 if (connect(s, sa, sa_len) < 0) {
2285 int e = tor_socket_errno(s);
2286 if (!ERRNO_IS_CONN_EINPROGRESS(e)) {
2287 /* yuck. kill it. */
2288 *socket_error = e;
2289 log_info(LD_NET,
2290 "connect() to socket failed: %s",
2291 tor_socket_strerror(e));
2293 return -1;
2294 } else {
2295 inprogress = 1;
2296 }
2297 }
2298
2299 note_connection(false /* outbound */, conn);
2300
2301 /* it succeeded. we're connected. */
2302 log_fn(inprogress ? LOG_DEBUG : LOG_INFO, LD_NET,
2303 "Connection to socket %s (sock "TOR_SOCKET_T_FORMAT").",
2304 inprogress ? "in progress" : "established", s);
2305 conn->s = s;
2306 if (connection_add_connecting(conn) < 0) {
2307 /* no space, forget it */
2308 *socket_error = SOCK_ERRNO(ENOBUFS);
2309 return -1;
2310 }
2311
2312 return inprogress ? 0 : 1;
2313}
2314
2315/* Log a message if connection attempt is made when IPv4 or IPv6 is disabled.
2316 * Log a less severe message if we couldn't conform to ClientPreferIPv6ORPort
2317 * or ClientPreferIPv6ORPort. */
2318static void
2319connection_connect_log_client_use_ip_version(const connection_t *conn)
2320{
2321 const or_options_t *options = get_options();
2322
2323 /* Only clients care about ClientUseIPv4/6, bail out early on servers, and
2324 * on connections we don't care about */
2325 if (server_mode(options) || !conn || conn->type == CONN_TYPE_EXIT) {
2326 return;
2327 }
2328
2329 /* We're only prepared to log OR and DIR connections here */
2330 if (conn->type != CONN_TYPE_OR && conn->type != CONN_TYPE_DIR) {
2331 return;
2332 }
2333
2334 const int must_ipv4 = !reachable_addr_use_ipv6(options);
2335 const int must_ipv6 = (options->ClientUseIPv4 == 0);
2336 const int pref_ipv6 = (conn->type == CONN_TYPE_OR
2339 tor_addr_t real_addr;
2340 tor_addr_copy(&real_addr, &conn->addr);
2341
2342 /* Check if we broke a mandatory address family restriction */
2343 if ((must_ipv4 && tor_addr_family(&real_addr) == AF_INET6)
2344 || (must_ipv6 && tor_addr_family(&real_addr) == AF_INET)) {
2345 static int logged_backtrace = 0;
2346 log_info(LD_BUG, "Outgoing %s connection to %s violated ClientUseIPv%s 0.",
2347 conn->type == CONN_TYPE_OR ? "OR" : "Dir",
2348 fmt_addr(&real_addr),
2349 options->ClientUseIPv4 == 0 ? "4" : "6");
2350 if (!logged_backtrace) {
2351 log_backtrace(LOG_INFO, LD_BUG, "Address came from");
2352 logged_backtrace = 1;
2353 }
2354 }
2355
2356 /* Bridges are allowed to break IPv4/IPv6 ORPort preferences to connect to
2357 * the node's configured address when ClientPreferIPv6ORPort is auto */
2358 if (options->UseBridges && conn->type == CONN_TYPE_OR
2359 && options->ClientPreferIPv6ORPort == -1) {
2360 return;
2361 }
2362
2363 if (reachable_addr_use_ipv6(options)) {
2364 log_info(LD_NET, "Our outgoing connection is using IPv%d.",
2365 tor_addr_family(&real_addr) == AF_INET6 ? 6 : 4);
2366 }
2367
2368 /* Check if we couldn't satisfy an address family preference */
2369 if ((!pref_ipv6 && tor_addr_family(&real_addr) == AF_INET6)
2370 || (pref_ipv6 && tor_addr_family(&real_addr) == AF_INET)) {
2371 log_info(LD_NET, "Outgoing connection to %s doesn't satisfy "
2372 "ClientPreferIPv6%sPort %d, with ClientUseIPv4 %d, and "
2373 "reachable_addr_use_ipv6 %d (ClientUseIPv6 %d and UseBridges "
2374 "%d).",
2375 fmt_addr(&real_addr),
2376 conn->type == CONN_TYPE_OR ? "OR" : "Dir",
2377 conn->type == CONN_TYPE_OR ? options->ClientPreferIPv6ORPort
2378 : options->ClientPreferIPv6DirPort,
2379 options->ClientUseIPv4, reachable_addr_use_ipv6(options),
2380 options->ClientUseIPv6, options->UseBridges);
2381 }
2382}
2383
2384/** Retrieve the outbound address depending on the protocol (IPv4 or IPv6)
2385 * and the connection type (relay, exit, ...)
2386 * Return a socket address or NULL in case nothing is configured.
2387 **/
2388const tor_addr_t *
2390 const or_options_t *options, unsigned int conn_type)
2391{
2392 const tor_addr_t *ext_addr = NULL;
2393
2394 int fam_index;
2395 switch (family) {
2396 case AF_INET:
2397 fam_index = 0;
2398 break;
2399 case AF_INET6:
2400 fam_index = 1;
2401 break;
2402 default:
2403 return NULL;
2404 }
2405
2406 // If an exit connection, use the exit address (if present)
2407 if (conn_type == CONN_TYPE_EXIT) {
2408 if (!tor_addr_is_null(
2409 &options->OutboundBindAddresses[OUTBOUND_ADDR_EXIT][fam_index])) {
2410 ext_addr = &options->OutboundBindAddresses[OUTBOUND_ADDR_EXIT]
2411 [fam_index];
2412 } else if (!tor_addr_is_null(
2414 [fam_index])) {
2415 ext_addr = &options->OutboundBindAddresses[OUTBOUND_ADDR_ANY]
2416 [fam_index];
2417 }
2418 } else { // All non-exit connections
2419 if (!tor_addr_is_null(
2420 &options->OutboundBindAddresses[OUTBOUND_ADDR_OR][fam_index])) {
2421 ext_addr = &options->OutboundBindAddresses[OUTBOUND_ADDR_OR]
2422 [fam_index];
2423 } else if (!tor_addr_is_null(
2425 [fam_index])) {
2426 ext_addr = &options->OutboundBindAddresses[OUTBOUND_ADDR_ANY]
2427 [fam_index];
2428 }
2429 }
2430 return ext_addr;
2431}
2432
2433/** Take conn, make a nonblocking socket; try to connect to
2434 * addr:port (port arrives in *host order*). If fail, return -1 and if
2435 * applicable put your best guess about errno into *<b>socket_error</b>.
2436 * Else assign s to conn->s: if connected return 1, if EAGAIN return 0.
2437 *
2438 * addr:port can be different to conn->addr:conn->port if connecting through
2439 * a proxy.
2440 *
2441 * address is used to make the logs useful.
2442 *
2443 * On success, add conn to the list of polled connections.
2444 */
2445int
2446connection_connect(connection_t *conn, const char *address,
2447 const tor_addr_t *addr, uint16_t port, int *socket_error)
2448{
2449 struct sockaddr_storage addrbuf;
2450 struct sockaddr_storage bind_addr_ss;
2451 struct sockaddr *bind_addr = NULL;
2452 struct sockaddr *dest_addr;
2453 int dest_addr_len, bind_addr_len = 0;
2454
2455 /* Log if we didn't stick to ClientUseIPv4/6 or ClientPreferIPv6OR/DirPort
2456 */
2457 connection_connect_log_client_use_ip_version(conn);
2458
2459 if (!tor_addr_is_loopback(addr)) {
2460 const tor_addr_t *ext_addr = NULL;
2462 conn->type);
2463 if (ext_addr) {
2464 memset(&bind_addr_ss, 0, sizeof(bind_addr_ss));
2465 bind_addr_len = tor_addr_to_sockaddr(ext_addr, 0,
2466 (struct sockaddr *) &bind_addr_ss,
2467 sizeof(bind_addr_ss));
2468 if (bind_addr_len == 0) {
2469 log_warn(LD_NET,
2470 "Error converting OutboundBindAddress %s into sockaddr. "
2471 "Ignoring.", fmt_and_decorate_addr(ext_addr));
2472 } else {
2473 bind_addr = (struct sockaddr *)&bind_addr_ss;
2474 }
2475 }
2476 }
2477
2478 memset(&addrbuf,0,sizeof(addrbuf));
2479 dest_addr = (struct sockaddr*) &addrbuf;
2480 dest_addr_len = tor_addr_to_sockaddr(addr, port, dest_addr, sizeof(addrbuf));
2481 tor_assert(dest_addr_len > 0);
2482
2483 log_debug(LD_NET, "Connecting to %s:%u.",
2484 escaped_safe_str_client(address), port);
2485
2486 return connection_connect_sockaddr(conn, dest_addr, dest_addr_len,
2487 bind_addr, bind_addr_len, socket_error);
2488}
2489
2490#ifdef HAVE_SYS_UN_H
2491
2492/** Take conn, make a nonblocking socket; try to connect to
2493 * an AF_UNIX socket at socket_path. If fail, return -1 and if applicable
2494 * put your best guess about errno into *<b>socket_error</b>. Else assign s
2495 * to conn->s: if connected return 1, if EAGAIN return 0.
2496 *
2497 * On success, add conn to the list of polled connections.
2498 */
2499int
2500connection_connect_unix(connection_t *conn, const char *socket_path,
2501 int *socket_error)
2502{
2503 struct sockaddr_un dest_addr;
2504
2505 tor_assert(socket_path);
2506
2507 /* Check that we'll be able to fit it into dest_addr later */
2508 if (strlen(socket_path) + 1 > sizeof(dest_addr.sun_path)) {
2509 log_warn(LD_NET,
2510 "Path %s is too long for an AF_UNIX socket\n",
2511 escaped_safe_str_client(socket_path));
2512 *socket_error = SOCK_ERRNO(ENAMETOOLONG);
2513 return -1;
2514 }
2515
2516 memset(&dest_addr, 0, sizeof(dest_addr));
2517 dest_addr.sun_family = AF_UNIX;
2518 strlcpy(dest_addr.sun_path, socket_path, sizeof(dest_addr.sun_path));
2519
2520 log_debug(LD_NET,
2521 "Connecting to AF_UNIX socket at %s.",
2522 escaped_safe_str_client(socket_path));
2523
2524 return connection_connect_sockaddr(conn,
2525 (struct sockaddr *)&dest_addr, sizeof(dest_addr),
2526 NULL, 0, socket_error);
2527}
2528
2529#endif /* defined(HAVE_SYS_UN_H) */
2530
2531/** Convert state number to string representation for logging purposes.
2532 */
2533static const char *
2535{
2536 static const char *unknown = "???";
2537 static const char *states[] = {
2538 "PROXY_NONE",
2539 "PROXY_INFANT",
2540 "PROXY_HTTPS_WANT_CONNECT_OK",
2541 "PROXY_SOCKS4_WANT_CONNECT_OK",
2542 "PROXY_SOCKS5_WANT_AUTH_METHOD_NONE",
2543 "PROXY_SOCKS5_WANT_AUTH_METHOD_RFC1929",
2544 "PROXY_SOCKS5_WANT_AUTH_RFC1929_OK",
2545 "PROXY_SOCKS5_WANT_CONNECT_OK",
2546 "PROXY_HAPROXY_WAIT_FOR_FLUSH",
2547 "PROXY_CONNECTED",
2548 };
2549
2550 CTASSERT(ARRAY_LENGTH(states) == PROXY_CONNECTED+1);
2551
2552 if (state < PROXY_NONE || state > PROXY_CONNECTED)
2553 return unknown;
2554
2555 return states[state];
2556}
2557
2558/** Returns the proxy type used by tor for a single connection, for
2559 * logging or high-level purposes. Don't use it to fill the
2560 * <b>proxy_type</b> field of or_connection_t; use the actual proxy
2561 * protocol instead.*/
2562static int
2564{
2565 const or_options_t *options = get_options();
2566
2567 if (options->ClientTransportPlugin) {
2568 /* If we have plugins configured *and* this addr/port is a known bridge
2569 * with a transport, then we should be PROXY_PLUGGABLE. */
2570 const transport_t *transport = NULL;
2571 int r;
2572 r = get_transport_by_bridge_addrport(&conn->addr, conn->port, &transport);
2573 if (r == 0 && transport)
2574 return PROXY_PLUGGABLE;
2575 }
2576
2577 /* In all other cases, we're using a global proxy. */
2578 if (options->HTTPSProxy)
2579 return PROXY_CONNECT;
2580 else if (options->Socks4Proxy)
2581 return PROXY_SOCKS4;
2582 else if (options->Socks5Proxy)
2583 return PROXY_SOCKS5;
2584 else if (options->TCPProxy) {
2585 /* The only supported protocol in TCPProxy is haproxy. */
2587 return PROXY_HAPROXY;
2588 } else
2589 return PROXY_NONE;
2590}
2591
2592/* One byte for the version, one for the command, two for the
2593 port, and four for the addr... and, one more for the
2594 username NUL: */
2595#define SOCKS4_STANDARD_BUFFER_SIZE (1 + 1 + 2 + 4 + 1)
2596
2597/** Write a proxy request of https to conn for conn->addr:conn->port,
2598 * authenticating with the auth details given in the configuration
2599 * (if available).
2600 *
2601 * Returns -1 if conn->addr is incompatible with the proxy protocol, and
2602 * 0 otherwise.
2603 */
2604static int
2606{
2607 tor_assert(conn);
2608
2609 const or_options_t *options = get_options();
2610 char buf[1024];
2611 char *base64_authenticator = NULL;
2612 const char *authenticator = options->HTTPSProxyAuthenticator;
2613
2614 /* Send HTTP CONNECT and authentication (if available) in
2615 * one request */
2616
2617 if (authenticator) {
2618 base64_authenticator = alloc_http_authenticator(authenticator);
2619 if (!base64_authenticator)
2620 log_warn(LD_OR, "Encoding https authenticator failed");
2621 }
2622
2623 if (base64_authenticator) {
2624 const char *addrport = fmt_addrport(&conn->addr, conn->port);
2625 tor_snprintf(buf, sizeof(buf), "CONNECT %s HTTP/1.1\r\n"
2626 "Host: %s\r\n"
2627 "Proxy-Authorization: Basic %s\r\n\r\n",
2628 addrport,
2629 addrport,
2630 base64_authenticator);
2631 tor_free(base64_authenticator);
2632 } else {
2633 tor_snprintf(buf, sizeof(buf), "CONNECT %s HTTP/1.0\r\n\r\n",
2634 fmt_addrport(&conn->addr, conn->port));
2635 }
2636
2637 connection_buf_add(buf, strlen(buf), conn);
2638 conn->proxy_state = PROXY_HTTPS_WANT_CONNECT_OK;
2639
2640 return 0;
2641}
2642
2643/** Write a proxy request of socks4 to conn for conn->addr:conn->port.
2644 *
2645 * Returns -1 if conn->addr is incompatible with the proxy protocol, and
2646 * 0 otherwise.
2647 */
2648static int
2650{
2651 tor_assert(conn);
2652
2653 unsigned char *buf;
2654 uint16_t portn;
2655 uint32_t ip4addr;
2656 size_t buf_size = 0;
2657 char *socks_args_string = NULL;
2658
2659 /* Send a SOCKS4 connect request */
2660
2661 if (tor_addr_family(&conn->addr) != AF_INET) {
2662 log_warn(LD_NET, "SOCKS4 client is incompatible with IPv6");
2663 return -1;
2664 }
2665
2666 { /* If we are here because we are trying to connect to a
2667 pluggable transport proxy, check if we have any SOCKS
2668 arguments to transmit. If we do, compress all arguments to
2669 a single string in 'socks_args_string': */
2670
2671 if (conn_get_proxy_type(conn) == PROXY_PLUGGABLE) {
2672 socks_args_string =
2674 if (socks_args_string)
2675 log_debug(LD_NET, "Sending out '%s' as our SOCKS argument string.",
2676 socks_args_string);
2677 }
2678 }
2679
2680 { /* Figure out the buffer size we need for the SOCKS message: */
2681
2682 buf_size = SOCKS4_STANDARD_BUFFER_SIZE;
2683
2684 /* If we have a SOCKS argument string, consider its size when
2685 calculating the buffer size: */
2686 if (socks_args_string)
2687 buf_size += strlen(socks_args_string);
2688 }
2689
2690 buf = tor_malloc_zero(buf_size);
2691
2692 ip4addr = tor_addr_to_ipv4n(&conn->addr);
2693 portn = htons(conn->port);
2694
2695 buf[0] = 4; /* version */
2696 buf[1] = SOCKS_COMMAND_CONNECT; /* command */
2697 memcpy(buf + 2, &portn, 2); /* port */
2698 memcpy(buf + 4, &ip4addr, 4); /* addr */
2699
2700 /* Next packet field is the userid. If we have pluggable
2701 transport SOCKS arguments, we have to embed them
2702 there. Otherwise, we use an empty userid. */
2703 if (socks_args_string) { /* place the SOCKS args string: */
2704 tor_assert(strlen(socks_args_string) > 0);
2705 tor_assert(buf_size >=
2706 SOCKS4_STANDARD_BUFFER_SIZE + strlen(socks_args_string));
2707 strlcpy((char *)buf + 8, socks_args_string, buf_size - 8);
2708 tor_free(socks_args_string);
2709 } else {
2710 buf[8] = 0; /* no userid */
2711 }
2712
2713 connection_buf_add((char *)buf, buf_size, conn);
2714 tor_free(buf);
2715
2716 conn->proxy_state = PROXY_SOCKS4_WANT_CONNECT_OK;
2717 return 0;
2718}
2719
2720/** Write a proxy request of socks5 to conn for conn->addr:conn->port,
2721 * authenticating with the auth details given in the configuration
2722 * (if available).
2723 *
2724 * Returns -1 if conn->addr is incompatible with the proxy protocol, and
2725 * 0 otherwise.
2726 */
2727static int
2729{
2730 tor_assert(conn);
2731
2732 const or_options_t *options = get_options();
2733 unsigned char buf[4]; /* fields: vers, num methods, method list */
2734
2735 /* Send a SOCKS5 greeting (connect request must wait) */
2736
2737 buf[0] = 5; /* version */
2738
2739 /* We have to use SOCKS5 authentication, if we have a
2740 Socks5ProxyUsername or if we want to pass arguments to our
2741 pluggable transport proxy: */
2742 if ((options->Socks5ProxyUsername) ||
2743 (conn_get_proxy_type(conn) == PROXY_PLUGGABLE &&
2744 (get_socks_args_by_bridge_addrport(&conn->addr, conn->port)))) {
2745 /* number of auth methods */
2746 buf[1] = 2;
2747 buf[2] = 0x00; /* no authentication */
2748 buf[3] = 0x02; /* rfc1929 Username/Passwd auth */
2749 conn->proxy_state = PROXY_SOCKS5_WANT_AUTH_METHOD_RFC1929;
2750 } else {
2751 buf[1] = 1;
2752 buf[2] = 0x00; /* no authentication */
2753 conn->proxy_state = PROXY_SOCKS5_WANT_AUTH_METHOD_NONE;
2754 }
2755
2756 connection_buf_add((char *)buf, 2 + buf[1], conn);
2757 return 0;
2758}
2759
2760/** Write a proxy request of haproxy to conn for conn->addr:conn->port.
2761 *
2762 * Returns -1 if conn->addr is incompatible with the proxy protocol, and
2763 * 0 otherwise.
2764 */
2765static int
2767{
2768 int ret = 0;
2769 tor_addr_port_t *addr_port = tor_addr_port_new(&conn->addr, conn->port);
2770 char *buf = haproxy_format_proxy_header_line(addr_port);
2771
2772 if (buf == NULL) {
2773 ret = -1;
2774 goto done;
2775 }
2776
2777 connection_buf_add(buf, strlen(buf), conn);
2778 /* In haproxy, we don't have to wait for the response, but we wait for ack.
2779 * So we can set the state to be PROXY_HAPROXY_WAIT_FOR_FLUSH. */
2780 conn->proxy_state = PROXY_HAPROXY_WAIT_FOR_FLUSH;
2781
2782 ret = 0;
2783 done:
2784 tor_free(buf);
2785 tor_free(addr_port);
2786 return ret;
2787}
2788
2789/** Write a proxy request of <b>type</b> (socks4, socks5, https, haproxy)
2790 * to conn for conn->addr:conn->port, authenticating with the auth details
2791 * given in the configuration (if available). SOCKS 5 and HTTP CONNECT
2792 * proxies support authentication.
2793 *
2794 * Returns -1 if conn->addr is incompatible with the proxy protocol, and
2795 * 0 otherwise.
2796 *
2797 * Use connection_read_proxy_handshake() to complete the handshake.
2798 */
2799int
2801{
2802 int ret = 0;
2803
2804 tor_assert(conn);
2805
2806 switch (type) {
2807 case PROXY_CONNECT:
2809 break;
2810
2811 case PROXY_SOCKS4:
2813 break;
2814
2815 case PROXY_SOCKS5:
2817 break;
2818
2819 case PROXY_HAPROXY:
2821 break;
2822
2823 default:
2824 log_err(LD_BUG, "Invalid proxy protocol, %d", type);
2826 ret = -1;
2827 break;
2828 }
2829
2830 if (ret == 0) {
2831 log_debug(LD_NET, "set state %s",
2833 }
2834
2835 return ret;
2836}
2837
2838/** Read conn's inbuf. If the http response from the proxy is all
2839 * here, make sure it's good news, then return 1. If it's bad news,
2840 * return -1. Else return 0 and hope for better luck next time.
2841 */
2842static int
2844{
2845 char *headers;
2846 char *reason=NULL;
2847 int status_code;
2848 time_t date_header;
2849
2850 switch (fetch_from_buf_http(conn->inbuf,
2851 &headers, MAX_HEADERS_SIZE,
2852 NULL, NULL, 10000, 0)) {
2853 case -1: /* overflow */
2854 log_warn(LD_PROTOCOL,
2855 "Your https proxy sent back an oversized response. Closing.");
2856 return -1;
2857 case 0:
2858 log_info(LD_NET,"https proxy response not all here yet. Waiting.");
2859 return 0;
2860 /* case 1, fall through */
2861 }
2862
2863 if (parse_http_response(headers, &status_code, &date_header,
2864 NULL, &reason) < 0) {
2865 log_warn(LD_NET,
2866 "Unparseable headers from proxy (%s). Closing.",
2867 connection_describe(conn));
2868 tor_free(headers);
2869 return -1;
2870 }
2871 tor_free(headers);
2872 if (!reason) reason = tor_strdup("[no reason given]");
2873
2874 if (status_code == 200) {
2875 log_info(LD_NET,
2876 "HTTPS connect for %s successful! (200 %s) Starting TLS.",
2877 connection_describe(conn), escaped(reason));
2878 tor_free(reason);
2879 return 1;
2880 }
2881 /* else, bad news on the status code */
2882 switch (status_code) {
2883 case 403:
2884 log_warn(LD_NET,
2885 "The https proxy refused to allow connection to %s "
2886 "(status code %d, %s). Closing.",
2887 conn->address, status_code, escaped(reason));
2888 break;
2889 default:
2890 log_warn(LD_NET,
2891 "The https proxy sent back an unexpected status code %d (%s). "
2892 "Closing.",
2893 status_code, escaped(reason));
2894 break;
2895 }
2896 tor_free(reason);
2897 return -1;
2898}
2899
2900/** Send SOCKS5 CONNECT command to <b>conn</b>, copying <b>conn->addr</b>
2901 * and <b>conn->port</b> into the request.
2902 */
2903static void
2905{
2906 unsigned char buf[1024];
2907 size_t reqsize = 6;
2908 uint16_t port = htons(conn->port);
2909
2910 buf[0] = 5; /* version */
2911 buf[1] = SOCKS_COMMAND_CONNECT; /* command */
2912 buf[2] = 0; /* reserved */
2913
2914 if (tor_addr_family(&conn->addr) == AF_INET) {
2915 uint32_t addr = tor_addr_to_ipv4n(&conn->addr);
2916
2917 buf[3] = 1;
2918 reqsize += 4;
2919 memcpy(buf + 4, &addr, 4);
2920 memcpy(buf + 8, &port, 2);
2921 } else { /* AF_INET6 */
2922 buf[3] = 4;
2923 reqsize += 16;
2924 memcpy(buf + 4, tor_addr_to_in6_addr8(&conn->addr), 16);
2925 memcpy(buf + 20, &port, 2);
2926 }
2927
2928 connection_buf_add((char *)buf, reqsize, conn);
2929
2930 conn->proxy_state = PROXY_SOCKS5_WANT_CONNECT_OK;
2931}
2932
2933/** Wrapper around fetch_from_buf_socks_client: see that functions
2934 * for documentation of its behavior. */
2935static int
2937 int state, char **reason)
2938{
2939 return fetch_from_buf_socks_client(conn->inbuf, state, reason);
2940}
2941
2942/** Call this from connection_*_process_inbuf() to advance the proxy
2943 * handshake.
2944 *
2945 * No matter what proxy protocol is used, if this function returns 1, the
2946 * handshake is complete, and the data remaining on inbuf may contain the
2947 * start of the communication with the requested server.
2948 *
2949 * Returns 0 if the current buffer contains an incomplete response, and -1
2950 * on error.
2951 */
2952int
2954{
2955 int ret = 0;
2956 char *reason = NULL;
2957
2958 log_debug(LD_NET, "enter state %s",
2960
2961 switch (conn->proxy_state) {
2962 case PROXY_HTTPS_WANT_CONNECT_OK:
2964 if (ret == 1)
2965 conn->proxy_state = PROXY_CONNECTED;
2966 break;
2967
2968 case PROXY_SOCKS4_WANT_CONNECT_OK:
2970 conn->proxy_state,
2971 &reason);
2972 if (ret == 1)
2973 conn->proxy_state = PROXY_CONNECTED;
2974 break;
2975
2976 case PROXY_SOCKS5_WANT_AUTH_METHOD_NONE:
2978 conn->proxy_state,
2979 &reason);
2980 /* no auth needed, do connect */
2981 if (ret == 1) {
2983 ret = 0;
2984 }
2985 break;
2986
2987 case PROXY_SOCKS5_WANT_AUTH_METHOD_RFC1929:
2989 conn->proxy_state,
2990 &reason);
2991
2992 /* send auth if needed, otherwise do connect */
2993 if (ret == 1) {
2995 ret = 0;
2996 } else if (ret == 2) {
2997 unsigned char buf[1024];
2998 size_t reqsize, usize, psize;
2999 const char *user, *pass;
3000 char *socks_args_string = NULL;
3001
3002 if (conn_get_proxy_type(conn) == PROXY_PLUGGABLE) {
3003 socks_args_string =
3005 if (!socks_args_string) {
3006 log_warn(LD_NET, "Could not create SOCKS args string for PT.");
3007 ret = -1;
3008 break;
3009 }
3010
3011 log_debug(LD_NET, "PT SOCKS5 arguments: %s", socks_args_string);
3012 tor_assert(strlen(socks_args_string) > 0);
3013 tor_assert(strlen(socks_args_string) <= MAX_SOCKS5_AUTH_SIZE_TOTAL);
3014
3015 if (strlen(socks_args_string) > MAX_SOCKS5_AUTH_FIELD_SIZE) {
3016 user = socks_args_string;
3018 pass = socks_args_string + MAX_SOCKS5_AUTH_FIELD_SIZE;
3019 psize = strlen(socks_args_string) - MAX_SOCKS5_AUTH_FIELD_SIZE;
3020 } else {
3021 user = socks_args_string;
3022 usize = strlen(socks_args_string);
3023 pass = "\0";
3024 psize = 1;
3025 }
3026 } else if (get_options()->Socks5ProxyUsername) {
3029 tor_assert(user && pass);
3030 usize = strlen(user);
3031 psize = strlen(pass);
3032 } else {
3033 log_err(LD_BUG, "We entered %s for no reason!", __func__);
3035 ret = -1;
3036 break;
3037 }
3038
3039 /* Username and password lengths should have been checked
3040 above and during torrc parsing. */
3043 reqsize = 3 + usize + psize;
3044
3045 buf[0] = 1; /* negotiation version */
3046 buf[1] = usize;
3047 memcpy(buf + 2, user, usize);
3048 buf[2 + usize] = psize;
3049 memcpy(buf + 3 + usize, pass, psize);
3050
3051 if (socks_args_string)
3052 tor_free(socks_args_string);
3053
3054 connection_buf_add((char *)buf, reqsize, conn);
3055
3056 conn->proxy_state = PROXY_SOCKS5_WANT_AUTH_RFC1929_OK;
3057 ret = 0;
3058 }
3059 break;
3060
3061 case PROXY_SOCKS5_WANT_AUTH_RFC1929_OK:
3063 conn->proxy_state,
3064 &reason);
3065 /* send the connect request */
3066 if (ret == 1) {
3068 ret = 0;
3069 }
3070 break;
3071
3072 case PROXY_SOCKS5_WANT_CONNECT_OK:
3074 conn->proxy_state,
3075 &reason);
3076 if (ret == 1)
3077 conn->proxy_state = PROXY_CONNECTED;
3078 break;
3079
3080 default:
3081 log_err(LD_BUG, "Invalid proxy_state for reading, %d",
3082 conn->proxy_state);
3084 ret = -1;
3085 break;
3086 }
3087
3088 log_debug(LD_NET, "leaving state %s",
3090
3091 if (ret < 0) {
3092 if (reason) {
3093 log_warn(LD_NET, "Proxy Client: unable to connect %s (%s)",
3094 connection_describe(conn), escaped(reason));
3095 tor_free(reason);
3096 } else {
3097 log_warn(LD_NET, "Proxy Client: unable to connect %s",
3098 connection_describe(conn));
3099 }
3100 } else if (ret == 1) {
3101 log_info(LD_NET, "Proxy Client: %s successful",
3102 connection_describe(conn));
3103 }
3104
3105 return ret;
3106}
3107
3108/** Given a list of listener connections in <b>old_conns</b>, and list of
3109 * port_cfg_t entries in <b>ports</b>, open a new listener for every port in
3110 * <b>ports</b> that does not already have a listener in <b>old_conns</b>.
3111 *
3112 * Remove from <b>old_conns</b> every connection that has a corresponding
3113 * entry in <b>ports</b>. Add to <b>new_conns</b> new every connection we
3114 * launch. If we may need to perform socket rebind when creating new
3115 * listener that replaces old one, create a <b>listener_replacement_t</b>
3116 * struct for affected pair and add it to <b>replacements</b>.
3117 *
3118 * If <b>control_listeners_only</b> is true, then we only open control
3119 * listeners, and we do not remove any noncontrol listeners from
3120 * old_conns.
3121 *
3122 * Return 0 on success, -1 on failure.
3123 **/
3124static int
3126 const smartlist_t *ports,
3127 smartlist_t *new_conns,
3128 smartlist_t *replacements,
3129 int control_listeners_only)
3130{
3131#ifndef ENABLE_LISTENER_REBIND
3132 (void)replacements;
3133#endif
3134
3135 smartlist_t *launch = smartlist_new();
3136 int r = 0;
3137
3138 if (control_listeners_only) {
3139 SMARTLIST_FOREACH(ports, port_cfg_t *, p, {
3140 if (p->type == CONN_TYPE_CONTROL_LISTENER)
3141 smartlist_add(launch, p);
3142 });
3143 } else {
3144 smartlist_add_all(launch, ports);
3145 }
3146
3147 /* Iterate through old_conns, comparing it to launch: remove from both lists
3148 * each pair of elements that corresponds to the same port. */
3149 SMARTLIST_FOREACH_BEGIN(old_conns, connection_t *, conn) {
3150 const port_cfg_t *found_port = NULL;
3151
3152 /* Okay, so this is a listener. Is it configured? */
3153 /* That is, is it either: 1) exact match - address and port
3154 * pair match exactly between old listener and new port; or 2)
3155 * wildcard match - port matches exactly, but *one* of the
3156 * addresses is wildcard (0.0.0.0 or ::)?
3157 */
3158 SMARTLIST_FOREACH_BEGIN(launch, const port_cfg_t *, wanted) {
3159 if (conn->type != wanted->type)
3160 continue;
3161 if ((conn->socket_family != AF_UNIX && wanted->is_unix_addr) ||
3162 (conn->socket_family == AF_UNIX && ! wanted->is_unix_addr))
3163 continue;
3164
3165 if (wanted->server_cfg.no_listen)
3166 continue; /* We don't want to open a listener for this one */
3167
3168 if (wanted->is_unix_addr) {
3169 if (conn->socket_family == AF_UNIX &&
3170 !strcmp(wanted->unix_addr, conn->address)) {
3171 found_port = wanted;
3172 break;
3173 }
3174 } else {
3175 /* Numeric values of old and new port match exactly. */
3176 const int port_matches_exact = (wanted->port == conn->port);
3177 /* Ports match semantically - either their specific values
3178 match exactly, or new port is 'auto'.
3179 */
3180 const int port_matches = (wanted->port == CFG_AUTO_PORT ||
3181 port_matches_exact);
3182
3183 if (port_matches && tor_addr_eq(&wanted->addr, &conn->addr)) {
3184 found_port = wanted;
3185 break;
3186 }
3187#ifdef ENABLE_LISTENER_REBIND
3188 /* Rebinding may be needed if all of the following are true:
3189 * 1) Address family is the same in old and new listeners.
3190 * 2) Port number matches exactly (numeric value is the same).
3191 * 3) *One* of listeners (either old one or new one) has a
3192 * wildcard IP address (0.0.0.0 or [::]).
3193 *
3194 * These are the exact conditions for a first bind() syscall
3195 * to fail with EADDRINUSE.
3196 */
3197 const int may_need_rebind =
3198 tor_addr_family(&wanted->addr) == tor_addr_family(&conn->addr) &&
3199 port_matches_exact && bool_neq(tor_addr_is_null(&wanted->addr),
3200 tor_addr_is_null(&conn->addr));
3201 if (replacements && may_need_rebind) {
3202 listener_replacement_t *replacement =
3203 tor_malloc(sizeof(listener_replacement_t));
3204
3205 replacement->old_conn = conn;
3206 replacement->new_port = wanted;
3207 smartlist_add(replacements, replacement);
3208
3209 SMARTLIST_DEL_CURRENT(launch, wanted);
3210 SMARTLIST_DEL_CURRENT(old_conns, conn);
3211 break;
3212 }
3213#endif /* defined(ENABLE_LISTENER_REBIND) */
3214 }
3215 } SMARTLIST_FOREACH_END(wanted);
3216
3217 if (found_port) {
3218 /* This listener is already running; we don't need to launch it. */
3219 //log_debug(LD_NET, "Already have %s on %s:%d",
3220 // conn_type_to_string(found_port->type), conn->address, conn->port);
3221 smartlist_remove(launch, found_port);
3222 /* And we can remove the connection from old_conns too. */
3223 SMARTLIST_DEL_CURRENT(old_conns, conn);
3224 }
3225 } SMARTLIST_FOREACH_END(conn);
3226
3227 /* Now open all the listeners that are configured but not opened. */
3228 SMARTLIST_FOREACH_BEGIN(launch, const port_cfg_t *, port) {
3229 int skip = 0;
3230 connection_t *conn = connection_listener_new_for_port(port, &skip, NULL);
3231
3232 if (conn && new_conns)
3233 smartlist_add(new_conns, conn);
3234 else if (!skip)
3235 r = -1;
3236 } SMARTLIST_FOREACH_END(port);
3237
3238 smartlist_free(launch);
3239
3240 return r;
3241}
3242
3243/** Launch listeners for each port you should have open. Only launch
3244 * listeners who are not already open, and only close listeners we no longer
3245 * want.
3246 *
3247 * Add all new connections to <b>new_conns</b>.
3248 *
3249 * If <b>close_all_noncontrol</b> is true, then we only open control
3250 * listeners, and we close all other listeners.
3251 */
3252int
3253retry_all_listeners(smartlist_t *new_conns, int close_all_noncontrol)
3254{
3255 smartlist_t *listeners = smartlist_new();
3256 smartlist_t *replacements = smartlist_new();
3257 const or_options_t *options = get_options();
3258 int retval = 0;
3259 const uint16_t old_or_port = routerconf_find_or_port(options, AF_INET);
3260 const uint16_t old_or_port_ipv6 =
3261 routerconf_find_or_port(options,AF_INET6);
3262 const uint16_t old_dir_port = routerconf_find_dir_port(options, 0);
3263
3265 if (connection_is_listener(conn) && !conn->marked_for_close)
3266 smartlist_add(listeners, conn);
3267 } SMARTLIST_FOREACH_END(conn);
3268
3269 if (retry_listener_ports(listeners,
3271 new_conns,
3272 replacements,
3273 close_all_noncontrol) < 0)
3274 retval = -1;
3275
3276#ifdef ENABLE_LISTENER_REBIND
3277 if (smartlist_len(replacements))
3278 log_debug(LD_NET, "%d replacements - starting rebinding loop.",
3279 smartlist_len(replacements));
3280
3282 int addr_in_use = 0;
3283 int skip = 0;
3284
3285 tor_assert(r->new_port);
3286 tor_assert(r->old_conn);
3287
3288 connection_t *new_conn =
3289 connection_listener_new_for_port(r->new_port, &skip, &addr_in_use);
3290 connection_t *old_conn = r->old_conn;
3291
3292 if (skip) {
3293 log_debug(LD_NET, "Skipping creating new listener for %s",
3294 connection_describe(old_conn));
3295 continue;
3296 }
3297
3299 connection_mark_for_close(old_conn);
3300
3301 if (addr_in_use) {
3302 new_conn = connection_listener_new_for_port(r->new_port,
3303 &skip, &addr_in_use);
3304 }
3305
3306 /* There are many reasons why we can't open a new listener port so in case
3307 * we hit those, bail early so tor can stop. */
3308 if (!new_conn) {
3309 log_warn(LD_NET, "Unable to create listener port: %s:%d",
3310 fmt_and_decorate_addr(&r->new_port->addr), r->new_port->port);
3311 retval = -1;
3312 break;
3313 }
3314
3315 smartlist_add(new_conns, new_conn);
3316
3317 char *old_desc = tor_strdup(connection_describe(old_conn));
3318 log_notice(LD_NET, "Closed no-longer-configured %s "
3319 "(replaced by %s)",
3320 old_desc, connection_describe(new_conn));
3321 tor_free(old_desc);
3322 } SMARTLIST_FOREACH_END(r);
3323#endif /* defined(ENABLE_LISTENER_REBIND) */
3324
3325 /* Any members that were still in 'listeners' don't correspond to
3326 * any configured port. Kill 'em. */
3327 SMARTLIST_FOREACH_BEGIN(listeners, connection_t *, conn) {
3328 log_notice(LD_NET, "Closing no-longer-configured %s on %s:%d",
3330 fmt_and_decorate_addr(&conn->addr), conn->port);
3332 connection_mark_for_close(conn);
3333 } SMARTLIST_FOREACH_END(conn);
3334
3335 smartlist_free(listeners);
3336 /* Cleanup any remaining listener replacement. */
3337 SMARTLIST_FOREACH(replacements, listener_replacement_t *, r, tor_free(r));
3338 smartlist_free(replacements);
3339
3340 if (old_or_port != routerconf_find_or_port(options, AF_INET) ||
3341 old_or_port_ipv6 != routerconf_find_or_port(options, AF_INET6) ||
3342 old_dir_port != routerconf_find_dir_port(options, 0)) {
3343 /* Our chosen ORPort or DirPort is not what it used to be: the
3344 * descriptor we had (if any) should be regenerated. (We won't
3345 * automatically notice this because of changes in the option,
3346 * since the value could be "auto".) */
3347 mark_my_descriptor_dirty("Chosen Or/DirPort changed");
3348 }
3349
3350 return retval;
3351}
3352
3353/** Mark every listener of type other than CONTROL_LISTENER to be closed. */
3354void
3356{
3358 if (conn->marked_for_close)
3359 continue;
3360 if (conn->type == CONN_TYPE_CONTROL_LISTENER)
3361 continue;
3362 if (connection_is_listener(conn))
3363 connection_mark_for_close(conn);
3364 } SMARTLIST_FOREACH_END(conn);
3365}
3366
3367/** Mark every external connection not used for controllers for close. */
3368void
3370{
3372 if (conn->marked_for_close)
3373 continue;
3374 switch (conn->type) {
3376 case CONN_TYPE_CONTROL:
3377 break;
3378 case CONN_TYPE_AP:
3379 connection_mark_unattached_ap(TO_ENTRY_CONN(conn),
3380 END_STREAM_REASON_HIBERNATING);
3381 break;
3382 case CONN_TYPE_OR:
3383 {
3384 or_connection_t *orconn = TO_OR_CONN(conn);
3385 if (orconn->chan) {
3387 } else {
3388 /*
3389 * There should have been one, but mark for close and hope
3390 * for the best..
3391 */
3392 connection_mark_for_close(conn);
3393 }
3394 }
3395 break;
3396 default:
3397 connection_mark_for_close(conn);
3398 break;
3399 }
3400 } SMARTLIST_FOREACH_END(conn);
3401}
3402
3403/** Return 1 if we should apply rate limiting to <b>conn</b>, and 0
3404 * otherwise.
3405 * Right now this just checks if it's an internal IP address or an
3406 * internal connection. We also should, but don't, check if the connection
3407 * uses pluggable transports, since we should then limit it even if it
3408 * comes from an internal IP address. */
3409static int
3411{
3412 const or_options_t *options = get_options();
3413 if (conn->linked)
3414 return 0; /* Internal connection */
3415 else if (! options->CountPrivateBandwidth &&
3417 (tor_addr_family(&conn->addr) == AF_UNSPEC || /* no address */
3418 tor_addr_family(&conn->addr) == AF_UNIX || /* no address */
3419 tor_addr_is_internal(&conn->addr, 0)))
3420 return 0; /* Internal address */
3421 else
3422 return 1;
3423}
3424
3425/** When was either global write bucket last empty? If this was recent, then
3426 * we're probably low on bandwidth, and we should be stingy with our bandwidth
3427 * usage. */
3428static time_t write_buckets_last_empty_at = -100;
3429
3430/** How many seconds of no active local circuits will make the
3431 * connection revert to the "relayed" bandwidth class? */
3432#define CLIENT_IDLE_TIME_FOR_PRIORITY 30
3433
3434/** Return 1 if <b>conn</b> should use tokens from the "relayed"
3435 * bandwidth rates, else 0. Currently, only OR conns with bandwidth
3436 * class 1, and directory conns that are serving data out, count.
3437 */
3438static int
3440{
3441 if (conn->type == CONN_TYPE_OR &&
3444 return 1;
3445 if (conn->type == CONN_TYPE_DIR && DIR_CONN_IS_SERVER(conn))
3446 return 1;
3447 return 0;
3448}
3449
3450/** Helper function to decide how many bytes out of <b>global_bucket</b>
3451 * we're willing to use for this transaction. <b>base</b> is the size
3452 * of a cell on the network; <b>priority</b> says whether we should
3453 * write many of them or just a few; and <b>conn_bucket</b> (if
3454 * non-negative) provides an upper limit for our answer. */
3455static ssize_t
3456connection_bucket_get_share(int base, int priority,
3457 ssize_t global_bucket_val, ssize_t conn_bucket)
3458{
3459 ssize_t at_most;
3460 ssize_t num_bytes_high = (priority ? 32 : 16) * base;
3461 ssize_t num_bytes_low = (priority ? 4 : 2) * base;
3462
3463 /* Do a rudimentary limiting so one circuit can't hog a connection.
3464 * Pick at most 32 cells, at least 4 cells if possible, and if we're in
3465 * the middle pick 1/8 of the available bandwidth. */
3466 at_most = global_bucket_val / 8;
3467 at_most -= (at_most % base); /* round down */
3468 if (at_most > num_bytes_high) /* 16 KB, or 8 KB for low-priority */
3469 at_most = num_bytes_high;
3470 else if (at_most < num_bytes_low) /* 2 KB, or 1 KB for low-priority */
3471 at_most = num_bytes_low;
3472
3473 if (at_most > global_bucket_val)
3474 at_most = global_bucket_val;
3475
3476 if (conn_bucket >= 0 && at_most > conn_bucket)
3477 at_most = conn_bucket;
3478
3479 if (at_most < 0)
3480 return 0;
3481 return at_most;
3482}
3483
3484/** How many bytes at most can we read onto this connection? */
3485static ssize_t
3487{
3488 int base = RELAY_PAYLOAD_SIZE;
3489 int priority = conn->type != CONN_TYPE_DIR;
3490 ssize_t conn_bucket = -1;
3491 size_t global_bucket_val = token_bucket_rw_get_read(&global_bucket);
3492 if (global_bucket_val == 0) {
3493 /* We reached our global read limit: count this as an overload.
3494 *
3495 * The token bucket is always initialized (see connection_bucket_init() and
3496 * options_validate_relay_bandwidth()) and hence we can assume that if the
3497 * token ever hits zero, it's a limit that got popped and not the bucket
3498 * being uninitialized.
3499 */
3500 rep_hist_note_overload(OVERLOAD_READ);
3501 }
3502
3503 if (connection_speaks_cells(conn)) {
3504 or_connection_t *or_conn = TO_OR_CONN(conn);
3505 if (conn->state == OR_CONN_STATE_OPEN)
3506 conn_bucket = token_bucket_rw_get_read(&or_conn->bucket);
3507 base = get_cell_network_size(or_conn->wide_circ_ids);
3508 }
3509
3510 /* Edge connection have their own read bucket due to flow control being able
3511 * to set a rate limit for them. However, for exit connections, we still need
3512 * to honor the global bucket as well. */
3513 if (CONN_IS_EDGE(conn)) {
3514 const edge_connection_t *edge_conn = CONST_TO_EDGE_CONN(conn);
3515 conn_bucket = token_bucket_rw_get_read(&edge_conn->bucket);
3516 if (conn->type == CONN_TYPE_EXIT) {
3517 /* Decide between our limit and the global one. */
3518 goto end;
3519 }
3520 return conn_bucket;
3521 }
3522
3523 if (!connection_is_rate_limited(conn)) {
3524 /* be willing to read on local conns even if our buckets are empty */
3525 return conn_bucket>=0 ? conn_bucket : 1<<14;
3526 }
3527
3528 if (connection_counts_as_relayed_traffic(conn, now)) {
3529 size_t relayed = token_bucket_rw_get_read(&global_relayed_bucket);
3530 global_bucket_val = MIN(global_bucket_val, relayed);
3531 }
3532
3533 end:
3534 return connection_bucket_get_share(base, priority,
3535 global_bucket_val, conn_bucket);
3536}
3537
3538/** How many bytes at most can we write onto this connection? */
3539ssize_t
3541{
3542 int base = RELAY_PAYLOAD_SIZE;
3543 int priority = conn->type != CONN_TYPE_DIR;
3544 size_t conn_bucket = buf_datalen(conn->outbuf);
3545 size_t global_bucket_val = token_bucket_rw_get_write(&global_bucket);
3546 if (global_bucket_val == 0) {
3547 /* We reached our global write limit: We should count this as an overload.
3548 * See above function for more information */
3549 rep_hist_note_overload(OVERLOAD_WRITE);
3550 }
3551
3552 if (!connection_is_rate_limited(conn)) {
3553 /* be willing to write to local conns even if our buckets are empty */
3554 return conn_bucket;
3555 }
3556
3557 if (connection_speaks_cells(conn)) {
3558 /* use the per-conn write limit if it's lower */
3559 or_connection_t *or_conn = TO_OR_CONN(conn);
3560 if (conn->state == OR_CONN_STATE_OPEN)
3561 conn_bucket = MIN(conn_bucket,
3562 token_bucket_rw_get_write(&or_conn->bucket));
3563 base = get_cell_network_size(or_conn->wide_circ_ids);
3564 }
3565
3566 if (connection_counts_as_relayed_traffic(conn, now)) {
3567 size_t relayed = token_bucket_rw_get_write(&global_relayed_bucket);
3568 global_bucket_val = MIN(global_bucket_val, relayed);
3569 }
3570
3571 return connection_bucket_get_share(base, priority,
3572 global_bucket_val, conn_bucket);
3573}
3574
3575/** Return true iff the global write buckets are low enough that we
3576 * shouldn't send <b>attempt</b> bytes of low-priority directory stuff
3577 * out to <b>conn</b>.
3578 *
3579 * If we are a directory authority, always answer dir requests thus true is
3580 * always returned.
3581 *
3582 * Note: There are a lot of parameters we could use here:
3583 * - global_relayed_write_bucket. Low is bad.
3584 * - global_write_bucket. Low is bad.
3585 * - bandwidthrate. Low is bad.
3586 * - bandwidthburst. Not a big factor?
3587 * - attempt. High is bad.
3588 * - total bytes queued on outbufs. High is bad. But I'm wary of
3589 * using this, since a few slow-flushing queues will pump up the
3590 * number without meaning what we meant to mean. What we really
3591 * mean is "total directory bytes added to outbufs recently", but
3592 * that's harder to quantify and harder to keep track of.
3593 */
3594bool
3596{
3597 size_t smaller_bucket =
3598 MIN(token_bucket_rw_get_write(&global_bucket),
3599 token_bucket_rw_get_write(&global_relayed_bucket));
3600
3601 /* Special case for authorities (directory only). */
3602 if (authdir_mode_v3(get_options())) {
3603 /* Are we configured to possibly reject requests under load? */
3605 /* Answer request no matter what. */
3606 return false;
3607 }
3608 /* Always answer requests from a known relay which includes the other
3609 * authorities. The following looks up the addresses for relays that we
3610 * have their descriptor _and_ any configured trusted directories. */
3612 return false;
3613 }
3614 }
3615
3616 if (!connection_is_rate_limited(conn))
3617 return false; /* local conns don't get limited */
3618
3619 if (smaller_bucket < attempt)
3620 return true; /* not enough space. */
3621
3622 {
3623 const time_t diff = approx_time() - write_buckets_last_empty_at;
3624 if (diff <= 1)
3625 return true; /* we're already hitting our limits, no more please */
3626 }
3627 return false;
3628}
3629
3630/** When did we last tell the accounting subsystem about transmitted
3631 * bandwidth? */
3633
3634/** Helper: adjusts our bandwidth history and informs the controller as
3635 * appropriate, given that we have just read <b>num_read</b> bytes and written
3636 * <b>num_written</b> bytes on <b>conn</b>. */
3637static void
3639 time_t now, size_t num_read, size_t num_written)
3640{
3641 /* Count bytes of answering direct and tunneled directory requests */
3642 if (conn->type == CONN_TYPE_DIR && conn->purpose == DIR_PURPOSE_SERVER) {
3643 if (num_read > 0)
3644 bwhist_note_dir_bytes_read(num_read, now);
3645 if (num_written > 0)
3646 bwhist_note_dir_bytes_written(num_written, now);
3647 }
3648
3649 /* Linked connections and internal IPs aren't counted for statistics or
3650 * accounting:
3651 * - counting linked connections would double-count BEGINDIR bytes, because
3652 * they are sent as Dir bytes on the linked connection, and OR bytes on
3653 * the OR connection;
3654 * - relays and clients don't connect to internal IPs, unless specifically
3655 * configured to do so. If they are configured that way, we don't count
3656 * internal bytes.
3657 */
3658 if (!connection_is_rate_limited(conn))
3659 return;
3660
3661 const bool is_ipv6 = (conn->socket_family == AF_INET6);
3662 if (conn->type == CONN_TYPE_OR)
3664 num_written, now, is_ipv6);
3665
3666 if (num_read > 0) {
3667 bwhist_note_bytes_read(num_read, now, is_ipv6);
3668 }
3669 if (num_written > 0) {
3670 bwhist_note_bytes_written(num_written, now, is_ipv6);
3671 }
3672 if (conn->type == CONN_TYPE_EXIT)
3673 rep_hist_note_exit_bytes(conn->port, num_written, num_read);
3674
3675 /* Remember these bytes towards statistics. */
3676 stats_increment_bytes_read_and_written(num_read, num_written);
3677
3678 /* Remember these bytes towards accounting. */
3681 accounting_add_bytes(num_read, num_written,
3682 (int)(now - last_recorded_accounting_at));
3683 } else {
3684 accounting_add_bytes(num_read, num_written, 0);
3685 }
3687 }
3688}
3689
3690/** We just read <b>num_read</b> and wrote <b>num_written</b> bytes
3691 * onto <b>conn</b>. Decrement buckets appropriately. */
3692static void
3694 size_t num_read, size_t num_written)
3695{
3696 if (num_written >= INT_MAX || num_read >= INT_MAX) {
3697 log_err(LD_BUG, "Value out of range. num_read=%lu, num_written=%lu, "
3698 "connection type=%s, state=%s",
3699 (unsigned long)num_read, (unsigned long)num_written,
3701 conn_state_to_string(conn->type, conn->state));
3703 if (num_written >= INT_MAX)
3704 num_written = 1;
3705 if (num_read >= INT_MAX)
3706 num_read = 1;
3707 }
3708
3709 record_num_bytes_transferred_impl(conn, now, num_read, num_written);
3710
3711 /* Edge connection need to decrement the read side of the bucket used by our
3712 * congestion control. */
3713 if (CONN_IS_EDGE(conn) && num_read > 0) {
3714 edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
3715 token_bucket_rw_dec(&edge_conn->bucket, num_read, 0);
3716 }
3717
3718 if (!connection_is_rate_limited(conn))
3719 return; /* local IPs are free */
3720
3721 unsigned flags = 0;
3722 if (connection_counts_as_relayed_traffic(conn, now)) {
3723 flags = token_bucket_rw_dec(&global_relayed_bucket, num_read, num_written);
3724 }
3725 flags |= token_bucket_rw_dec(&global_bucket, num_read, num_written);
3726
3727 if (flags & TB_WRITE) {
3729 }
3730 if (connection_speaks_cells(conn) && conn->state == OR_CONN_STATE_OPEN) {
3731 or_connection_t *or_conn = TO_OR_CONN(conn);
3732 token_bucket_rw_dec(&or_conn->bucket, num_read, num_written);
3733 }
3734}
3735
3736/**
3737 * Mark <b>conn</b> as needing to stop reading because bandwidth has been
3738 * exhausted. If <b>is_global_bw</b>, it is closing because global bandwidth
3739 * limit has been exhausted. Otherwise, it is closing because its own
3740 * bandwidth limit has been exhausted.
3741 */
3742void
3744{
3745 (void)is_global_bw;
3746 // Double-calls to stop-reading are correlated with stalling for
3747 // ssh uploads. Might as well prevent this from happening,
3748 // especially the read_blocked_on_bw flag. That was clearly getting
3749 // set when it should not be, during an already-blocked XOFF
3750 // condition.
3751 if (!CONN_IS_EDGE(conn) || !TO_EDGE_CONN(conn)->xoff_received) {
3752 conn->read_blocked_on_bw = 1;
3755 }
3756}
3757
3758/**
3759 * Mark <b>conn</b> as needing to stop reading because write bandwidth has
3760 * been exhausted. If <b>is_global_bw</b>, it is closing because global
3761 * bandwidth limit has been exhausted. Otherwise, it is closing because its
3762 * own bandwidth limit has been exhausted.
3763*/
3764void
3766{
3767 (void)is_global_bw;
3768 conn->write_blocked_on_bw = 1;
3771}
3772
3773/** If we have exhausted our global buckets, or the buckets for conn,
3774 * stop reading. */
3775void
3777{
3778 int is_global = 1;
3779 const char *reason;
3780
3781 if (CONN_IS_EDGE(conn) &&
3782 token_bucket_rw_get_read(&TO_EDGE_CONN(conn)->bucket) <= 0) {
3783 reason = "edge connection read bucket exhausted. Pausing.";
3784 is_global = false;
3785 } else if (!connection_is_rate_limited(conn)) {
3786 return; /* Always okay. */
3787 } else if (token_bucket_rw_get_read(&global_bucket) <= 0) {
3788 reason = "global read bucket exhausted. Pausing.";
3790 token_bucket_rw_get_read(&global_relayed_bucket) <= 0) {
3791 reason = "global relayed read bucket exhausted. Pausing.";
3792 } else if (connection_speaks_cells(conn) &&
3793 conn->state == OR_CONN_STATE_OPEN &&
3794 token_bucket_rw_get_read(&TO_OR_CONN(conn)->bucket) <= 0) {
3795 reason = "connection read bucket exhausted. Pausing.";
3796 is_global = false;
3797 } else {
3798 return; /* all good, no need to stop it */
3799 }
3800
3801 LOG_FN_CONN(conn, (LOG_DEBUG, LD_NET, "%s", reason));
3802 connection_read_bw_exhausted(conn, is_global);
3803}
3804
3805/** If we have exhausted our global buckets, or the buckets for conn,
3806 * stop writing. */
3807void
3809{
3810 const char *reason;
3811
3812 if (!connection_is_rate_limited(conn))
3813 return; /* Always okay. */
3814
3815 bool is_global = true;
3816 if (token_bucket_rw_get_write(&global_bucket) <= 0) {
3817 reason = "global write bucket exhausted. Pausing.";
3819 token_bucket_rw_get_write(&global_relayed_bucket) <= 0) {
3820 reason = "global relayed write bucket exhausted. Pausing.";
3821 } else if (connection_speaks_cells(conn) &&
3822 conn->state == OR_CONN_STATE_OPEN &&
3823 token_bucket_rw_get_write(&TO_OR_CONN(conn)->bucket) <= 0) {
3824 reason = "connection write bucket exhausted. Pausing.";
3825 is_global = false;
3826 } else
3827 return; /* all good, no need to stop it */
3828
3829 LOG_FN_CONN(conn, (LOG_DEBUG, LD_NET, "%s", reason));
3830 connection_write_bw_exhausted(conn, is_global);
3831}
3832
3833/** Initialize the global buckets to the values configured in the
3834 * options */
3835void
3837{
3838 const or_options_t *options = get_options();
3839 const uint32_t now_ts = monotime_coarse_get_stamp();
3840 token_bucket_rw_init(&global_bucket,
3841 (int32_t)options->BandwidthRate,
3842 (int32_t)options->BandwidthBurst,
3843 now_ts);
3844 if (options->RelayBandwidthRate) {
3845 token_bucket_rw_init(&global_relayed_bucket,
3846 (int32_t)options->RelayBandwidthRate,
3847 (int32_t)options->RelayBandwidthBurst,
3848 now_ts);
3849 } else {
3850 token_bucket_rw_init(&global_relayed_bucket,
3851 (int32_t)options->BandwidthRate,
3852 (int32_t)options->BandwidthBurst,
3853 now_ts);
3854 }
3855
3857}
3858
3859/** Update the global connection bucket settings to a new value. */
3860void
3862{
3863 token_bucket_rw_adjust(&global_bucket,
3864 (int32_t)options->BandwidthRate,
3865 (int32_t)options->BandwidthBurst);
3866 if (options->RelayBandwidthRate) {
3867 token_bucket_rw_adjust(&global_relayed_bucket,
3868 (int32_t)options->RelayBandwidthRate,
3869 (int32_t)options->RelayBandwidthBurst);
3870 } else {
3871 token_bucket_rw_adjust(&global_relayed_bucket,
3872 (int32_t)options->BandwidthRate,
3873 (int32_t)options->BandwidthBurst);
3874 }
3875}
3876
3877/**
3878 * Cached value of the last coarse-timestamp when we refilled the
3879 * global buckets.
3880 */
3882/**
3883 * Refill the token buckets for a single connection <b>conn</b>, and the
3884 * global token buckets as appropriate. Requires that <b>now_ts</b> is
3885 * the time in coarse timestamp units.
3886 */
3887static void
3889{
3890 /* Note that we only check for equality here: the underlying
3891 * token bucket functions can handle moving backwards in time if they
3892 * need to. */
3893 if (now_ts != last_refilled_global_buckets_ts) {
3894 token_bucket_rw_refill(&global_bucket, now_ts);
3895 token_bucket_rw_refill(&global_relayed_bucket, now_ts);
3897 }
3898
3899 if (connection_speaks_cells(conn) && conn->state == OR_CONN_STATE_OPEN) {
3900 or_connection_t *or_conn = TO_OR_CONN(conn);
3901 token_bucket_rw_refill(&or_conn->bucket, now_ts);
3902 }
3903
3904 if (CONN_IS_EDGE(conn)) {
3905 token_bucket_rw_refill(&TO_EDGE_CONN(conn)->bucket, now_ts);
3906 }
3907}
3908
3909/**
3910 * Event to re-enable all connections that were previously blocked on read or
3911 * write.
3912 */
3914
3915/** True iff reenable_blocked_connections_ev is currently scheduled. */
3917
3918/** Delay after which to run reenable_blocked_connections_ev. */
3920
3921/**
3922 * Re-enable all connections that were previously blocked on read or write.
3923 * This event is scheduled after enough time has elapsed to be sure
3924 * that the buckets will refill when the connections have something to do.
3925 */
3926static void
3928{
3929 (void)ev;
3930 (void)arg;
3932 /* For conflux, we noticed logs of connection_start_reading() called
3933 * multiple times while we were blocked from a previous XOFF, and this
3934 * was log was correlated with stalls during ssh uploads. So we added
3935 * this additional check, to avoid connection_start_reading() without
3936 * getting an XON. The most important piece is always allowing
3937 * the read_blocked_on_bw to get cleared, either way. */
3938 if (conn->read_blocked_on_bw == 1 &&
3939 (!CONN_IS_EDGE(conn) || !TO_EDGE_CONN(conn)->xoff_received)) {
3941 }
3942 conn->read_blocked_on_bw = 0;
3943 if (conn->write_blocked_on_bw == 1) {
3945 conn->write_blocked_on_bw = 0;
3946 }
3947 } SMARTLIST_FOREACH_END(conn);
3948
3950}
3951
3952/**
3953 * Initialize the mainloop event that we use to wake up connections that
3954 * find themselves blocked on bandwidth.
3955 */
3956static void
3958{
3963 }
3964 time_t sec = options->TokenBucketRefillInterval / 1000;
3965 int msec = (options->TokenBucketRefillInterval % 1000);
3967 reenable_blocked_connections_delay.tv_usec = msec * 1000;
3968}
3969
3970/**
3971 * Called when we have blocked a connection for being low on bandwidth:
3972 * schedule an event to reenable such connections, if it is not already
3973 * scheduled.
3974 */
3975static void
3977{
3979 return;
3980 if (BUG(reenable_blocked_connections_ev == NULL)) {
3982 }
3986}
3987
3988/** Read bytes from conn->s and process them.
3989 *
3990 * It calls connection_buf_read_from_socket() to bring in any new bytes,
3991 * and then calls connection_process_inbuf() to process them.
3992 *
3993 * Mark the connection and return -1 if you want to close it, else
3994 * return 0.
3995 */
3996static int
3998{
3999 ssize_t max_to_read=-1, try_to_read;
4000 size_t before, n_read = 0;
4001 int socket_error = 0;
4002
4003 if (conn->marked_for_close)
4004 return 0; /* do nothing */
4005
4007
4009
4010 switch (conn->type) {
4027 /* This should never happen; eventdns.c handles the reads here. */
4029 return 0;
4030 }
4031
4032 loop_again:
4033 try_to_read = max_to_read;
4035
4036 before = buf_datalen(conn->inbuf);
4037 if (connection_buf_read_from_socket(conn, &max_to_read, &socket_error) < 0) {
4038 /* There's a read error; kill the connection.*/
4039 if (conn->type == CONN_TYPE_OR) {
4041 socket_error != 0 ?
4042 errno_to_orconn_end_reason(socket_error) :
4043 END_OR_CONN_REASON_CONNRESET,
4044 socket_error != 0 ?
4045 tor_socket_strerror(socket_error) :
4046 "(unknown, errno was 0)");
4047 }
4048 if (CONN_IS_EDGE(conn)) {
4049 edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
4050 connection_edge_end_errno(edge_conn);
4051 if (conn->type == CONN_TYPE_AP && TO_ENTRY_CONN(conn)->socks_request) {
4052 /* broken, don't send a socks reply back */
4054 }
4055 }
4056 connection_close_immediate(conn); /* Don't flush; connection is dead. */
4057 /*
4058 * This can bypass normal channel checking since we did
4059 * connection_or_notify_error() above.
4060 */
4061 connection_mark_for_close_internal(conn);
4062 return -1;
4063 }
4064 n_read += buf_datalen(conn->inbuf) - before;
4065 if (CONN_IS_EDGE(conn) && try_to_read != max_to_read) {
4066 /* instruct it not to try to package partial cells. */
4067 if (connection_process_inbuf(conn, 0) < 0) {
4068 return -1;
4069 }
4070 if (!conn->marked_for_close &&
4071 connection_is_reading(conn) &&
4072 !conn->inbuf_reached_eof &&
4073 max_to_read > 0)
4074 goto loop_again; /* try reading again, in case more is here now */
4075 }
4076 /* one last try, packaging partial cells and all. */
4077 if (!conn->marked_for_close &&
4078 connection_process_inbuf(conn, 1) < 0) {
4079 return -1;
4080 }
4081 if (conn->linked_conn) {
4082 /* The other side's handle_write() will never actually get called, so
4083 * we need to invoke the appropriate callbacks ourself. */
4084 connection_t *linked = conn->linked_conn;
4085
4086 if (n_read) {
4087 /* Probably a no-op, since linked conns typically don't count for
4088 * bandwidth rate limiting. But do it anyway so we can keep stats
4089 * accurately. Note that since we read the bytes from conn, and
4090 * we're writing the bytes onto the linked connection, we count
4091 * these as <i>written</i> bytes. */
4092 connection_buckets_decrement(linked, approx_time(), 0, n_read);
4093
4094 if (connection_flushed_some(linked) < 0)
4095 connection_mark_for_close(linked);
4096 if (!connection_wants_to_flush(linked))
4098 }
4099
4100 if (!buf_datalen(linked->outbuf) && conn->active_on_link)
4102 }
4103 /* If we hit the EOF, call connection_reached_eof(). */
4104 if (!conn->marked_for_close &&
4105 conn->inbuf_reached_eof &&
4106 connection_reached_eof(conn) < 0) {
4107 return -1;
4108 }
4109 return 0;
4110}
4111
4112/* DOCDOC connection_handle_read */
4113int
4114connection_handle_read(connection_t *conn)
4115{
4116 int res;
4117 update_current_time(time(NULL));
4118 res = connection_handle_read_impl(conn);
4119 return res;
4120}
4121
4122/** Pull in new bytes from conn->s or conn->linked_conn onto conn->inbuf,
4123 * either directly or via TLS. Reduce the token buckets by the number of bytes
4124 * read.
4125 *
4126 * If *max_to_read is -1, then decide it ourselves, else go with the
4127 * value passed to us. When returning, if it's changed, subtract the
4128 * number of bytes we read from *max_to_read.
4129 *
4130 * Return -1 if we want to break conn, else return 0.
4131 */
4132static int
4134 int *socket_error)
4135{
4136 int result;
4137 ssize_t at_most = *max_to_read;
4138 size_t slack_in_buf, more_to_read;
4139 size_t n_read = 0, n_written = 0;
4140
4141 if (at_most == -1) { /* we need to initialize it */
4142 /* how many bytes are we allowed to read? */
4143 at_most = connection_bucket_read_limit(conn, approx_time());
4144 }
4145
4146 /* Do not allow inbuf to grow past BUF_MAX_LEN. */
4147 const ssize_t maximum = BUF_MAX_LEN - buf_datalen(conn->inbuf);
4148 if (at_most > maximum) {
4149 at_most = maximum;
4150 }
4151
4152 slack_in_buf = buf_slack(conn->inbuf);
4153 again:
4154 if ((size_t)at_most > slack_in_buf && slack_in_buf >= 1024) {
4155 more_to_read = at_most - slack_in_buf;
4156 at_most = slack_in_buf;
4157 } else {
4158 more_to_read = 0;
4159 }
4160
4161 if (connection_speaks_cells(conn) &&
4163 int pending;
4164 or_connection_t *or_conn = TO_OR_CONN(conn);
4165 size_t initial_size;
4166 if (conn->state == OR_CONN_STATE_TLS_HANDSHAKING ||
4168 /* continue handshaking even if global token bucket is empty */
4169 return connection_tls_continue_handshake(or_conn);
4170 }
4171
4172 log_debug(LD_NET,
4173 "%d: starting, inbuf_datalen %ld (%d pending in tls object)."
4174 " at_most %ld.",
4175 (int)conn->s,(long)buf_datalen(conn->inbuf),
4176 tor_tls_get_pending_bytes(or_conn->tls), (long)at_most);
4177
4178 initial_size = buf_datalen(conn->inbuf);
4179 /* else open, or closing */
4180 result = buf_read_from_tls(conn->inbuf, or_conn->tls, at_most);
4181 if (TOR_TLS_IS_ERROR(result) || result == TOR_TLS_CLOSE)
4182 or_conn->tls_error = result;
4183 else
4184 or_conn->tls_error = 0;
4185
4186 switch (result) {
4187 case TOR_TLS_CLOSE:
4188 case TOR_TLS_ERROR_IO:
4189 log_debug(LD_NET,"TLS %s closed %son read. Closing.",
4190 connection_describe(conn),
4191 result == TOR_TLS_CLOSE ? "cleanly " : "");
4192 return result;
4194 log_debug(LD_NET,"tls error [%s] from %s. Breaking.",
4195 tor_tls_err_to_string(result),
4196 connection_describe(conn));
4197 return result;
4198 case TOR_TLS_WANTWRITE:
4200 return 0;
4201 case TOR_TLS_WANTREAD:
4202 if (conn->in_connection_handle_write) {
4203 /* We've been invoked from connection_handle_write, because we're
4204 * waiting for a TLS renegotiation, the renegotiation started, and
4205 * SSL_read returned WANTWRITE. But now SSL_read is saying WANTREAD
4206 * again. Stop waiting for write events now, or else we'll
4207 * busy-loop until data arrives for us to read.
4208 * XXX: remove this when v2 handshakes support is dropped. */
4210 if (!connection_is_reading(conn))
4212 }
4213 /* we're already reading, one hopes */
4214 break;
4215 case TOR_TLS_DONE: /* no data read, so nothing to process */
4216 break; /* so we call bucket_decrement below */
4217 default:
4218 break;
4219 }
4220 pending = tor_tls_get_pending_bytes(or_conn->tls);
4221 if (pending) {
4222 /* If we have any pending bytes, we read them now. This *can*
4223 * take us over our read allotment, but really we shouldn't be
4224 * believing that SSL bytes are the same as TCP bytes anyway. */
4225 int r2 = buf_read_from_tls(conn->inbuf, or_conn->tls, pending);
4226 if (BUG(r2<0)) {
4227 log_warn(LD_BUG, "apparently, reading pending bytes can fail.");
4228 return -1;
4229 }
4230 }
4231 result = (int)(buf_datalen(conn->inbuf)-initial_size);
4232 tor_tls_get_n_raw_bytes(or_conn->tls, &n_read, &n_written);
4233 log_debug(LD_GENERAL, "After TLS read of %d: %ld read, %ld written",
4234 result, (long)n_read, (long)n_written);
4235 } else if (conn->linked) {
4236 if (conn->linked_conn) {
4237 result = (int) buf_move_all(conn->inbuf, conn->linked_conn->outbuf);
4238 } else {
4239 result = 0;
4240 }
4241 //log_notice(LD_GENERAL, "Moved %d bytes on an internal link!", result);
4242 /* If the other side has disappeared, or if it's been marked for close and
4243 * we flushed its outbuf, then we should set our inbuf_reached_eof. */
4244 if (!conn->linked_conn ||
4245 (conn->linked_conn->marked_for_close &&
4246 buf_datalen(conn->linked_conn->outbuf) == 0))
4247 conn->inbuf_reached_eof = 1;
4248
4249 n_read = (size_t) result;
4250 } else {
4251 /* !connection_speaks_cells, !conn->linked_conn. */
4252 int reached_eof = 0;
4253 CONN_LOG_PROTECT(conn,
4254 result = buf_read_from_socket(conn->inbuf, conn->s,
4255 at_most,
4256 &reached_eof,
4257 socket_error));
4258 if (reached_eof)
4259 conn->inbuf_reached_eof = 1;
4260
4261// log_fn(LOG_DEBUG,"read_to_buf returned %d.",read_result);
4262
4263 if (result < 0)
4264 return -1;
4265 n_read = (size_t) result;
4266 }
4267
4268 if (n_read > 0) {
4269 /* change *max_to_read */
4270 *max_to_read = at_most - n_read;
4271
4272 /* Onion service application connection. Note read bytes for metrics. */
4273 if (CONN_IS_EDGE(conn) && TO_EDGE_CONN(conn)->hs_ident) {
4274 edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
4275 hs_metrics_app_read_bytes(&edge_conn->hs_ident->identity_pk,
4276 edge_conn->hs_ident->orig_virtual_port,
4277 n_read);
4278 }
4279
4280 /* Update edge_conn->n_read */
4281 if (conn->type == CONN_TYPE_AP) {
4282 edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
4283
4284 /* Check for overflow: */
4285 if (PREDICT_LIKELY(UINT32_MAX - edge_conn->n_read > n_read))
4286 edge_conn->n_read += (int)n_read;
4287 else
4288 edge_conn->n_read = UINT32_MAX;
4289 }
4290
4291 /* If CONN_BW events are enabled, update conn->n_read_conn_bw for
4292 * OR/DIR/EXIT connections, checking for overflow. */
4294 (conn->type == CONN_TYPE_OR ||
4295 conn->type == CONN_TYPE_DIR ||
4296 conn->type == CONN_TYPE_EXIT)) {
4297 if (PREDICT_LIKELY(UINT32_MAX - conn->n_read_conn_bw > n_read))
4298 conn->n_read_conn_bw += (int)n_read;
4299 else
4300 conn->n_read_conn_bw = UINT32_MAX;
4301 }
4302 }
4303
4304 connection_buckets_decrement(conn, approx_time(), n_read, n_written);
4305
4306 if (more_to_read && result == at_most) {
4307 slack_in_buf = buf_slack(conn->inbuf);
4308 at_most = more_to_read;
4309 goto again;
4310 }
4311
4312 /* Call even if result is 0, since the global read bucket may
4313 * have reached 0 on a different conn, and this connection needs to
4314 * know to stop reading. */
4316 if (n_written > 0 && connection_is_writing(conn))
4318
4319 return 0;
4320}
4321
4322/** A pass-through to fetch_from_buf. */
4323int
4324connection_buf_get_bytes(char *string, size_t len, connection_t *conn)
4325{
4326 return buf_get_bytes(conn->inbuf, string, len);
4327}
4328
4329/** As buf_get_line(), but read from a connection's input buffer. */
4330int
4332 size_t *data_len)
4333{
4334 return buf_get_line(conn->inbuf, data, data_len);
4335}
4336
4337/** As fetch_from_buf_http, but fetches from a connection's input buffer_t as
4338 * appropriate. */
4339int
4341 char **headers_out, size_t max_headerlen,
4342 char **body_out, size_t *body_used,
4343 size_t max_bodylen, int force_complete)
4344{
4345 return fetch_from_buf_http(conn->inbuf, headers_out, max_headerlen,
4346 body_out, body_used, max_bodylen, force_complete);
4347}
4348
4349/** Return true if this connection has data to flush. */
4350int
4352{
4353 return connection_get_outbuf_len(conn) > 0;
4354}
4355
4356/** Are there too many bytes on edge connection <b>conn</b>'s outbuf to
4357 * send back a relay-level sendme yet? Return 1 if so, 0 if not. Used by
4358 * connection_edge_consider_sending_sendme().
4359 */
4360int
4362{
4363 return connection_get_outbuf_len(conn) > 10*CELL_PAYLOAD_SIZE;
4364}
4365
4366/**
4367 * On Windows Vista and Windows 7, tune the send buffer size according to a
4368 * hint from the OS.
4369 *
4370 * This should help fix slow upload rates.
4371 */
4372static void
4374{
4375#ifdef _WIN32
4376 /* We only do this on Vista and 7, because earlier versions of Windows
4377 * don't have the SIO_IDEAL_SEND_BACKLOG_QUERY functionality, and on
4378 * later versions it isn't necessary. */
4379 static int isVistaOr7 = -1;
4380 if (isVistaOr7 == -1) {
4381 isVistaOr7 = 0;
4382 OSVERSIONINFO osvi = { 0 };
4383 osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
4384 GetVersionEx(&osvi);
4385 if (osvi.dwMajorVersion == 6 && osvi.dwMinorVersion < 2)
4386 isVistaOr7 = 1;
4387 }
4388 if (!isVistaOr7)
4389 return;
4390 if (get_options()->ConstrainedSockets)
4391 return;
4392 ULONG isb = 0;
4393 DWORD bytesReturned = 0;
4394 if (!WSAIoctl(sock, SIO_IDEAL_SEND_BACKLOG_QUERY, NULL, 0,
4395 &isb, sizeof(isb), &bytesReturned, NULL, NULL)) {
4396 setsockopt(sock, SOL_SOCKET, SO_SNDBUF, (const char*)&isb, sizeof(isb));
4397 }
4398#else /* !defined(_WIN32) */
4399 (void) sock;
4400#endif /* defined(_WIN32) */
4401}
4402
4403/** Try to flush more bytes onto <b>conn</b>->s.
4404 *
4405 * This function is called in connection_handle_write(), which gets
4406 * called from conn_write_callback() in main.c when libevent tells us
4407 * that <b>conn</b> wants to write.
4408 *
4409 * Update <b>conn</b>->timestamp_last_write_allowed to now, and call flush_buf
4410 * or flush_buf_tls appropriately. If it succeeds and there are no more
4411 * more bytes on <b>conn</b>->outbuf, then call connection_finished_flushing
4412 * on it too.
4413 *
4414 * If <b>force</b>, then write as many bytes as possible, ignoring bandwidth
4415 * limits. (Used for flushing messages to controller connections on fatal
4416 * errors.)
4417 *
4418 * Mark the connection and return -1 if you want to close it, else
4419 * return 0.
4420 */
4421static int
4423{
4424 int e;
4425 socklen_t len=(socklen_t)sizeof(e);
4426 int result;
4427 ssize_t max_to_write;
4428 time_t now = approx_time();
4429 size_t n_read = 0, n_written = 0;
4430 int dont_stop_writing = 0;
4431
4433
4434 if (conn->marked_for_close || !SOCKET_OK(conn->s))
4435 return 0; /* do nothing */
4436
4437 if (conn->in_flushed_some) {
4438 log_warn(LD_BUG, "called recursively from inside conn->in_flushed_some");
4439 return 0;
4440 }
4441
4442 conn->timestamp_last_write_allowed = now;
4443
4445
4446 /* Sometimes, "writable" means "connected". */
4448 if (getsockopt(conn->s, SOL_SOCKET, SO_ERROR, (void*)&e, &len) < 0) {
4449 log_warn(LD_BUG, "getsockopt() syscall failed");
4450 if (conn->type == CONN_TYPE_OR) {
4451 or_connection_t *orconn = TO_OR_CONN(conn);
4453 } else {
4454 if (CONN_IS_EDGE(conn)) {
4456 }
4457 connection_mark_for_close(conn);
4458 }
4459 return -1;
4460 }
4461 if (e) {
4462 /* some sort of error, but maybe just inprogress still */
4463 if (!ERRNO_IS_CONN_EINPROGRESS(e)) {
4464 log_info(LD_NET,"in-progress connect failed. Removing. (%s)",
4465 tor_socket_strerror(e));
4466 if (CONN_IS_EDGE(conn))
4468 if (conn->type == CONN_TYPE_OR)
4471 tor_socket_strerror(e));
4472
4474 /*
4475 * This can bypass normal channel checking since we did
4476 * connection_or_notify_error() above.
4477 */
4478 connection_mark_for_close_internal(conn);
4479 return -1;
4480 } else {
4481 return 0; /* no change, see if next time is better */
4482 }
4483 }
4484 /* The connection is successful. */
4486 return -1;
4487 }
4488
4489 max_to_write = force ? (ssize_t)buf_datalen(conn->outbuf)
4490 : connection_bucket_write_limit(conn, now);
4491
4492 if (connection_speaks_cells(conn) &&
4494 or_connection_t *or_conn = TO_OR_CONN(conn);
4495 size_t initial_size;
4496 if (conn->state == OR_CONN_STATE_TLS_HANDSHAKING ||
4499 if (connection_tls_continue_handshake(or_conn) < 0) {
4500 /* Don't flush; connection is dead. */
4502 END_OR_CONN_REASON_MISC,
4503 "TLS error in connection_tls_"
4504 "continue_handshake()");
4506 /*
4507 * This can bypass normal channel checking since we did
4508 * connection_or_notify_error() above.
4509 */
4510 connection_mark_for_close_internal(conn);
4511 return -1;
4512 }
4513 return 0;
4514 } else if (conn->state == OR_CONN_STATE_TLS_SERVER_RENEGOTIATING) {
4515 return connection_handle_read(conn);
4516 }
4517
4518 /* else open, or closing */
4519 initial_size = buf_datalen(conn->outbuf);
4520 result = buf_flush_to_tls(conn->outbuf, or_conn->tls,
4521 max_to_write);
4522
4523 if (result >= 0)
4525
4526 /* If we just flushed the last bytes, tell the channel on the
4527 * or_conn to check if it needs to geoip_change_dirreq_state() */
4528 /* XXXX move this to flushed_some or finished_flushing -NM */
4529 if (buf_datalen(conn->outbuf) == 0 && or_conn->chan)
4530 channel_notify_flushed(TLS_CHAN_TO_BASE(or_conn->chan));
4531
4532 switch (result) {
4534 case TOR_TLS_CLOSE:
4535 or_conn->tls_error = result;
4536 log_info(LD_NET, result != TOR_TLS_CLOSE ?
4537 "tls error. breaking.":"TLS connection closed on flush");
4538 /* Don't flush; connection is dead. */
4540 END_OR_CONN_REASON_MISC,
4541 result != TOR_TLS_CLOSE ?
4542 "TLS error in during flush" :
4543 "TLS closed during flush");
4545 /*
4546 * This can bypass normal channel checking since we did
4547 * connection_or_notify_error() above.
4548 */
4549 connection_mark_for_close_internal(conn);
4550 return -1;
4551 case TOR_TLS_WANTWRITE:
4552 log_debug(LD_NET,"wanted write.");
4553 /* we're already writing */
4554 dont_stop_writing = 1;
4555 break;
4556 case TOR_TLS_WANTREAD:
4557 /* Make sure to avoid a loop if the receive buckets are empty. */
4558 log_debug(LD_NET,"wanted read.");
4559 if (!connection_is_reading(conn)) {
4561 /* we'll start reading again when we get more tokens in our
4562 * read bucket; then we'll start writing again too.
4563 */
4564 }
4565 /* else no problem, we're already reading */
4566 return 0;
4567 /* case TOR_TLS_DONE:
4568 * for TOR_TLS_DONE, fall through to check if the flushlen
4569 * is empty, so we can stop writing.
4570 */
4571 }
4572
4573 tor_tls_get_n_raw_bytes(or_conn->tls, &n_read, &n_written);
4574 log_debug(LD_GENERAL, "After TLS write of %d: %ld read, %ld written",
4575 result, (long)n_read, (long)n_written);
4576 or_conn->bytes_xmitted += result;
4577 or_conn->bytes_xmitted_by_tls += n_written;
4578 /* So we notice bytes were written even on error */
4579 /* XXXX This cast is safe since we can never write INT_MAX bytes in a
4580 * single set of TLS operations. But it looks kinda ugly. If we refactor
4581 * the *_buf_tls functions, we should make them return ssize_t or size_t
4582 * or something. */
4583 result = (int)(initial_size-buf_datalen(conn->outbuf));
4584 } else {
4585 CONN_LOG_PROTECT(conn,
4586 result = buf_flush_to_socket(conn->outbuf, conn->s,
4587 max_to_write));
4588 if (result < 0) {
4589 if (CONN_IS_EDGE(conn))
4591 if (conn->type == CONN_TYPE_AP) {
4592 /* writing failed; we couldn't send a SOCKS reply if we wanted to */
4594 }
4595
4596 connection_close_immediate(conn); /* Don't flush; connection is dead. */
4597 connection_mark_for_close(conn);
4598 return -1;
4599 }
4601 n_written = (size_t) result;
4602 }
4603
4604 if (n_written && conn->type == CONN_TYPE_AP) {
4605 edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
4606
4607 /* Check for overflow: */
4608 if (PREDICT_LIKELY(UINT32_MAX - edge_conn->n_written > n_written))
4609 edge_conn->n_written += (int)n_written;
4610 else
4611 edge_conn->n_written = UINT32_MAX;
4612 }
4613
4614 /* If CONN_BW events are enabled, update conn->n_written_conn_bw for
4615 * OR/DIR/EXIT connections, checking for overflow. */
4616 if (n_written && get_options()->TestingEnableConnBwEvent &&
4617 (conn->type == CONN_TYPE_OR ||
4618 conn->type == CONN_TYPE_DIR ||
4619 conn->type == CONN_TYPE_EXIT)) {
4620 if (PREDICT_LIKELY(UINT32_MAX - conn->n_written_conn_bw > n_written))
4621 conn->n_written_conn_bw += (int)n_written;
4622 else
4623 conn->n_written_conn_bw = UINT32_MAX;
4624 }
4625
4626 connection_buckets_decrement(conn, approx_time(), n_read, n_written);
4627
4628 if (result > 0) {
4629 /* If we wrote any bytes from our buffer, then call the appropriate
4630 * functions. */
4631 if (connection_flushed_some(conn) < 0) {
4632 if (connection_speaks_cells(conn)) {
4634 END_OR_CONN_REASON_MISC,
4635 "Got error back from "
4636 "connection_flushed_some()");
4637 }
4638
4639 /*
4640 * This can bypass normal channel checking since we did
4641 * connection_or_notify_error() above.
4642 */
4643 connection_mark_for_close_internal(conn);
4644 }
4645 }
4646
4647 if (!connection_wants_to_flush(conn) &&
4648 !dont_stop_writing) { /* it's done flushing */
4649 if (connection_finished_flushing(conn) < 0) {
4650 /* already marked */
4651 goto err;
4652 }
4653 goto done;
4654 }
4655
4656 /* Call even if result is 0, since the global write bucket may
4657 * have reached 0 on a different conn, and this connection needs to
4658 * know to stop writing. */
4660 if (n_read > 0 && connection_is_reading(conn))
4662
4663 done:
4664 /* If this is an edge connection with congestion control, check to see
4665 * if it is time to send an xon */
4666 if (conn_uses_flow_control(conn)) {
4667 flow_control_decide_xon(TO_EDGE_CONN(conn), n_written);
4668 }
4669
4670 return 0;
4671
4672 err:
4673 return -1;
4674}
4675
4676/* DOCDOC connection_handle_write */
4677int
4678connection_handle_write(connection_t *conn, int force)
4679{
4680 int res;
4681 update_current_time(time(NULL));
4682 /* connection_handle_write_impl() might call connection_handle_read()
4683 * if we're in the middle of a v2 handshake, in which case it needs this
4684 * flag set. */
4686 res = connection_handle_write_impl(conn, force);
4688 return res;
4689}
4690
4691/**
4692 * Try to flush data that's waiting for a write on <b>conn</b>. Return
4693 * -1 on failure, 0 on success.
4694 *
4695 * Don't use this function for regular writing; the buffers
4696 * system should be good enough at scheduling writes there. Instead, this
4697 * function is for cases when we're about to exit or something and we want
4698 * to report it right away.
4699 */
4700int
4702{
4703 return connection_handle_write(conn, 1);
4704}
4705
4706/** Helper for connection_write_to_buf_impl and connection_write_buf_to_buf:
4707 *
4708 * Return true iff it is okay to queue bytes on <b>conn</b>'s outbuf for
4709 * writing.
4710 */
4711static int
4713{
4714 /* if it's marked for close, only allow write if we mean to flush it */
4715 if (conn->marked_for_close && !conn->hold_open_until_flushed)
4716 return 0;
4717
4718 return 1;
4719}
4720
4721/** Helper for connection_write_to_buf_impl and connection_write_buf_to_buf:
4722 *
4723 * Called when an attempt to add bytes on <b>conn</b>'s outbuf has failed;
4724 * mark the connection and warn as appropriate.
4725 */
4726static void
4728{
4729 if (CONN_IS_EDGE(conn)) {
4730 /* if it failed, it means we have our package/delivery windows set
4731 wrong compared to our max outbuf size. close the whole circuit. */
4732 log_warn(LD_NET,
4733 "write_to_buf failed. Closing circuit (fd %d).", (int)conn->s);
4734 circuit_mark_for_close(circuit_get_by_edge_conn(TO_EDGE_CONN(conn)),
4735 END_CIRC_REASON_INTERNAL);
4736 } else if (conn->type == CONN_TYPE_OR) {
4737 or_connection_t *orconn = TO_OR_CONN(conn);
4738 log_warn(LD_NET,
4739 "write_to_buf failed on an orconn; notifying of error "
4740 "(fd %d)", (int)(conn->s));
4742 } else {
4743 log_warn(LD_NET,
4744 "write_to_buf failed. Closing connection (fd %d).",
4745 (int)conn->s);
4746 connection_mark_for_close(conn);
4747 }
4748}
4749
4750/** Helper for connection_write_to_buf_impl and connection_write_buf_to_buf:
4751 *
4752 * Called when an attempt to add bytes on <b>conn</b>'s outbuf has succeeded:
4753 * start writing if appropriate.
4754 */
4755static void
4757{
4758 /* If we receive optimistic data in the EXIT_CONN_STATE_RESOLVING
4759 * state, we don't want to try to write it right away, since
4760 * conn->write_event won't be set yet. Otherwise, write data from
4761 * this conn as the socket is available. */
4762 if (conn->write_event) {
4764 }
4765}
4766
4767/** Append <b>len</b> bytes of <b>string</b> onto <b>conn</b>'s
4768 * outbuf, and ask it to start writing.
4769 *
4770 * If <b>zlib</b> is nonzero, this is a directory connection that should get
4771 * its contents compressed or decompressed as they're written. If zlib is
4772 * negative, this is the last data to be compressed, and the connection's zlib
4773 * state should be flushed.
4774 */
4775MOCK_IMPL(void,
4776connection_write_to_buf_impl_,(const char *string, size_t len,
4777 connection_t *conn, int zlib))
4778{
4779 /* XXXX This function really needs to return -1 on failure. */
4780 int r;
4781 if (!len && !(zlib<0))
4782 return;
4783
4784 if (!connection_may_write_to_buf(conn))
4785 return;
4786
4787 if (zlib) {
4788 dir_connection_t *dir_conn = TO_DIR_CONN(conn);
4789 int done = zlib < 0;
4791 dir_conn->compress_state,
4792 string, len, done));
4793 } else {
4794 CONN_LOG_PROTECT(conn, r = buf_add(conn->outbuf, string, len));
4795 }
4796 if (r < 0) {
4798 return;
4799 }
4801}
4802
4803/**
4804 * Write a <b>string</b> (of size <b>len</b> to directory connection
4805 * <b>dir_conn</b>. Apply compression if connection is configured to use
4806 * it and finalize it if <b>done</b> is true.
4807 */
4808void
4809connection_dir_buf_add(const char *string, size_t len,
4810 dir_connection_t *dir_conn, int done)
4811{
4812 if (dir_conn->compress_state != NULL) {
4813 connection_buf_add_compress(string, len, dir_conn, done);
4814 return;
4815 }
4816
4817 connection_buf_add(string, len, TO_CONN(dir_conn));
4818}
4819
4820void
4821connection_buf_add_compress(const char *string, size_t len,
4822 dir_connection_t *conn, int done)
4823{
4824 connection_write_to_buf_impl_(string, len, TO_CONN(conn), done ? -1 : 1);
4825}
4826
4827/**
4828 * Add all bytes from <b>buf</b> to <b>conn</b>'s outbuf, draining them
4829 * from <b>buf</b>. (If the connection is marked and will soon be closed,
4830 * nothing is drained.)
4831 */
4832void
4834{
4835 tor_assert(conn);
4836 tor_assert(buf);
4837 size_t len = buf_datalen(buf);
4838 if (len == 0)
4839 return;
4840
4841 if (!connection_may_write_to_buf(conn))
4842 return;
4843
4844 buf_move_all(conn->outbuf, buf);
4846}
4847
4848#define CONN_GET_ALL_TEMPLATE(var, test) \
4849 STMT_BEGIN \
4850 smartlist_t *conns = get_connection_array(); \
4851 smartlist_t *ret_conns = smartlist_new(); \
4852 SMARTLIST_FOREACH_BEGIN(conns, connection_t *, var) { \
4853 if (var && (test) && !var->marked_for_close) \
4854 smartlist_add(ret_conns, var); \
4855 } SMARTLIST_FOREACH_END(var); \
4856 return ret_conns; \
4857 STMT_END
4858
4859/* Return a list of connections that aren't close and matches the given type
4860 * and state. The returned list can be empty and must be freed using
4861 * smartlist_free(). The caller does NOT have ownership of the objects in the
4862 * list so it must not free them nor reference them as they can disappear. */
4864connection_list_by_type_state(int type, int state)
4865{
4866 CONN_GET_ALL_TEMPLATE(conn, (conn->type == type && conn->state == state));
4867}
4868
4869/* Return a list of connections that aren't close and matches the given type
4870 * and purpose. The returned list can be empty and must be freed using
4871 * smartlist_free(). The caller does NOT have ownership of the objects in the
4872 * list so it must not free them nor reference them as they can disappear. */
4874connection_list_by_type_purpose(int type, int purpose)
4875{
4876 CONN_GET_ALL_TEMPLATE(conn,
4877 (conn->type == type && conn->purpose == purpose));
4878}
4879
4880/** Return a connection_t * from get_connection_array() that satisfies test on
4881 * var, and that is not marked for close. */
4882#define CONN_GET_TEMPLATE(var, test) \
4883 STMT_BEGIN \
4884 smartlist_t *conns = get_connection_array(); \
4885 SMARTLIST_FOREACH(conns, connection_t *, var, \
4886 { \
4887 if (var && (test) && !var->marked_for_close) \
4888 return var; \
4889 }); \
4890 return NULL; \
4891 STMT_END
4892
4893/** Return a connection with given type, address, port, and purpose;
4894 * or NULL if no such connection exists (or if all such connections are marked
4895 * for close). */
4898 const tor_addr_t *addr, uint16_t port,
4899 int purpose))
4900{
4901 CONN_GET_TEMPLATE(conn,
4902 (conn->type == type &&
4903 tor_addr_eq(&conn->addr, addr) &&
4904 conn->port == port &&
4905 conn->purpose == purpose));
4906}
4907
4908/** Return the stream with id <b>id</b> if it is not already marked for
4909 * close.
4910 */
4913{
4914 CONN_GET_TEMPLATE(conn, conn->global_identifier == id);
4915}
4916
4917/** Return a connection of type <b>type</b> that is not marked for close.
4918 */
4921{
4922 CONN_GET_TEMPLATE(conn, conn->type == type);
4923}
4924
4925/** Return a connection of type <b>type</b> that is in state <b>state</b>,
4926 * and that is not marked for close.
4927 */
4930{
4931 CONN_GET_TEMPLATE(conn, conn->type == type && conn->state == state);
4932}
4933
4934/**
4935 * Return a connection of type <b>type</b> that is not an internally linked
4936 * connection, and is not marked for close.
4937 **/
4940{
4941 CONN_GET_TEMPLATE(conn, conn->type == type && !conn->linked);
4942}
4943
4944/** Return a new smartlist of dir_connection_t * from get_connection_array()
4945 * that satisfy conn_test on connection_t *conn_var, and dirconn_test on
4946 * dir_connection_t *dirconn_var. conn_var must be of CONN_TYPE_DIR and not
4947 * marked for close to be included in the list. */
4948#define DIR_CONN_LIST_TEMPLATE(conn_var, conn_test, \
4949 dirconn_var, dirconn_test) \
4950 STMT_BEGIN \
4951 smartlist_t *conns = get_connection_array(); \
4952 smartlist_t *dir_conns = smartlist_new(); \
4953 SMARTLIST_FOREACH_BEGIN(conns, connection_t *, conn_var) { \
4954 if (conn_var && (conn_test) \
4955 && conn_var->type == CONN_TYPE_DIR \
4956 && !conn_var->marked_for_close) { \
4957 dir_connection_t *dirconn_var = TO_DIR_CONN(conn_var); \
4958 if (dirconn_var && (dirconn_test)) { \
4959 smartlist_add(dir_conns, dirconn_var); \
4960 } \
4961 } \
4962 } SMARTLIST_FOREACH_END(conn_var); \
4963 return dir_conns; \
4964 STMT_END
4965
4966/** Return a list of directory connections that are fetching the item
4967 * described by <b>purpose</b>/<b>resource</b>. If there are none,
4968 * return an empty list. This list must be freed using smartlist_free,
4969 * but the pointers in it must not be freed.
4970 * Note that this list should not be cached, as the pointers in it can be
4971 * freed if their connections close. */
4974 int purpose,
4975 const char *resource)
4976{
4978 conn->purpose == purpose,
4979 dirconn,
4980 0 == strcmp_opt(resource,
4981 dirconn->requested_resource));
4982}
4983
4984/** Return a list of directory connections that are fetching the item
4985 * described by <b>purpose</b>/<b>resource</b>/<b>state</b>. If there are
4986 * none, return an empty list. This list must be freed using smartlist_free,
4987 * but the pointers in it must not be freed.
4988 * Note that this list should not be cached, as the pointers in it can be
4989 * freed if their connections close. */
4992 int purpose,
4993 const char *resource,
4994 int state)
4995{
4997 conn->purpose == purpose && conn->state == state,
4998 dirconn,
4999 0 == strcmp_opt(resource,
5000 dirconn->requested_resource));
5001}
5002
5003#undef DIR_CONN_LIST_TEMPLATE
5004
5005/** Return an arbitrary active OR connection that isn't <b>this_conn</b>.
5006 *
5007 * We use this to guess if we should tell the controller that we
5008 * didn't manage to connect to any of our bridges. */
5009static connection_t *
5011{
5012 CONN_GET_TEMPLATE(conn,
5013 conn != TO_CONN(this_conn) && conn->type == CONN_TYPE_OR);
5014}
5015
5016/** Return 1 if there are any active OR connections apart from
5017 * <b>this_conn</b>.
5018 *
5019 * We use this to guess if we should tell the controller that we
5020 * didn't manage to connect to any of our bridges. */
5021int
5023{
5025 if (conn != NULL) {
5026 log_debug(LD_DIR, "%s: Found an OR connection: %s",
5027 __func__, connection_describe(conn));
5028 return 1;
5029 }
5030
5031 return 0;
5032}
5033
5034#undef CONN_GET_TEMPLATE
5035
5036/** Return 1 if <b>conn</b> is a listener conn, else return 0. */
5037int
5039{
5040 if (conn->type == CONN_TYPE_OR_LISTENER ||
5042 conn->type == CONN_TYPE_AP_LISTENER ||
5047 conn->type == CONN_TYPE_DIR_LISTENER ||
5050 return 1;
5051 return 0;
5052}
5053
5054/** Return 1 if <b>conn</b> is in state "open" and is not marked
5055 * for close, else return 0.
5056 */
5057int
5059{
5060 tor_assert(conn);
5061
5062 if (conn->marked_for_close)
5063 return 0;
5064
5065 if ((conn->type == CONN_TYPE_OR && conn->state == OR_CONN_STATE_OPEN) ||
5066 (conn->type == CONN_TYPE_EXT_OR) ||
5067 (conn->type == CONN_TYPE_AP && conn->state == AP_CONN_STATE_OPEN) ||
5068 (conn->type == CONN_TYPE_EXIT && conn->state == EXIT_CONN_STATE_OPEN) ||
5069 (conn->type == CONN_TYPE_CONTROL &&
5071 return 1;
5072
5073 return 0;
5074}
5075
5076/** Return 1 if conn is in 'connecting' state, else return 0. */
5077int
5079{
5080 tor_assert(conn);
5081
5082 if (conn->marked_for_close)
5083 return 0;
5084 switch (conn->type)
5085 {
5086 case CONN_TYPE_OR:
5087 return conn->state == OR_CONN_STATE_CONNECTING;
5088 case CONN_TYPE_EXIT:
5089 return conn->state == EXIT_CONN_STATE_CONNECTING;
5090 case CONN_TYPE_DIR:
5091 return conn->state == DIR_CONN_STATE_CONNECTING;
5092 }
5093
5094 return 0;
5095}
5096
5097/** Allocates a base64'ed authenticator for use in http or https
5098 * auth, based on the input string <b>authenticator</b>. Returns it
5099 * if success, else returns NULL. */
5100char *
5101alloc_http_authenticator(const char *authenticator)
5102{
5103 /* an authenticator in Basic authentication
5104 * is just the string "username:password" */
5105 const size_t authenticator_length = strlen(authenticator);
5106 const size_t base64_authenticator_length =
5107 base64_encode_size(authenticator_length, 0) + 1;
5108 char *base64_authenticator = tor_malloc(base64_authenticator_length);
5109 if (base64_encode(base64_authenticator, base64_authenticator_length,
5110 authenticator, authenticator_length, 0) < 0) {
5111 tor_free(base64_authenticator); /* free and set to null */
5112 }
5113 return base64_authenticator;
5114}
5115
5116/** Given a socket handle, check whether the local address (sockname) of the
5117 * socket is one that we've connected from before. If so, double-check
5118 * whether our address has changed and we need to generate keys. If we do,
5119 * call init_keys().
5120 */
5121static void
5123{
5124 tor_addr_t out_addr, iface_addr;
5125 tor_addr_t **last_interface_ip_ptr;
5126 sa_family_t family;
5127
5128 if (!outgoing_addrs)
5130
5131 if (tor_addr_from_getsockname(&out_addr, sock) < 0) {
5132 int e = tor_socket_errno(sock);
5133 log_warn(LD_NET, "getsockname() to check for address change failed: %s",
5134 tor_socket_strerror(e));
5135 return;
5136 }
5137 family = tor_addr_family(&out_addr);
5138
5139 if (family == AF_INET)
5140 last_interface_ip_ptr = &last_interface_ipv4;
5141 else if (family == AF_INET6)
5142 last_interface_ip_ptr = &last_interface_ipv6;
5143 else
5144 return;
5145
5146 if (! *last_interface_ip_ptr) {
5147 tor_addr_t *a = tor_malloc_zero(sizeof(tor_addr_t));
5148 if (get_interface_address6(LOG_INFO, family, a)==0) {
5149 *last_interface_ip_ptr = a;
5150 } else {
5151 tor_free(a);
5152 }
5153 }
5154
5155 /* If we've used this address previously, we're okay. */
5157 if (tor_addr_eq(a_ptr, &out_addr))
5158 return;
5159 );
5160
5161 /* Uh-oh. We haven't connected from this address before. Has the interface
5162 * address changed? */
5163 if (get_interface_address6(LOG_INFO, family, &iface_addr)<0)
5164 return;
5165
5166 if (tor_addr_eq(&iface_addr, *last_interface_ip_ptr)) {
5167 /* Nope, it hasn't changed. Add this address to the list. */
5168 smartlist_add(outgoing_addrs, tor_memdup(&out_addr, sizeof(tor_addr_t)));
5169 } else {
5170 /* The interface changed. We're a client, so we need to regenerate our
5171 * keys. First, reset the state. */
5172 log_notice(LD_NET, "Our IP address has changed. Rotating keys...");
5173 tor_addr_copy(*last_interface_ip_ptr, &iface_addr);
5176 smartlist_add(outgoing_addrs, tor_memdup(&out_addr, sizeof(tor_addr_t)));
5177 /* We'll need to resolve ourselves again. */
5178 resolved_addr_reset_last(AF_INET);
5179 /* Okay, now change our keys. */
5181 }
5182}
5183
5184/** Some systems have limited system buffers for recv and xmit on
5185 * sockets allocated in a virtual server or similar environment. For a Tor
5186 * server this can produce the "Error creating network socket: No buffer
5187 * space available" error once all available TCP buffer space is consumed.
5188 * This method will attempt to constrain the buffers allocated for the socket
5189 * to the desired size to stay below system TCP buffer limits.
5190 */
5191static void
5193{
5194 void *sz = (void*)&size;
5195 socklen_t sz_sz = (socklen_t) sizeof(size);
5196 if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, sz, sz_sz) < 0) {
5197 int e = tor_socket_errno(sock);
5198 log_warn(LD_NET, "setsockopt() to constrain send "
5199 "buffer to %d bytes failed: %s", size, tor_socket_strerror(e));
5200 }
5201 if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, sz, sz_sz) < 0) {
5202 int e = tor_socket_errno(sock);
5203 log_warn(LD_NET, "setsockopt() to constrain recv "
5204 "buffer to %d bytes failed: %s", size, tor_socket_strerror(e));
5205 }
5206}
5207
5208/** Process new bytes that have arrived on conn->inbuf.
5209 *
5210 * This function just passes conn to the connection-specific
5211 * connection_*_process_inbuf() function. It also passes in
5212 * package_partial if wanted.
5213 */
5214int
5215connection_process_inbuf(connection_t *conn, int package_partial)
5216{
5217 tor_assert(conn);
5218
5219 switch (conn->type) {
5220 case CONN_TYPE_OR:
5222 case CONN_TYPE_EXT_OR:
5224 case CONN_TYPE_EXIT:
5225 case CONN_TYPE_AP:
5227 package_partial);
5228 case CONN_TYPE_DIR:
5230 case CONN_TYPE_CONTROL:
5232 case CONN_TYPE_METRICS:
5234 default:
5235 log_err(LD_BUG,"got unexpected conn type %d.", conn->type);
5237 return -1;
5238 }
5239}
5240
5241/** Called whenever we've written data on a connection. */
5242static int
5244{
5245 int r = 0;
5247 conn->in_flushed_some = 1;
5248 if (conn->type == CONN_TYPE_DIR &&
5250 r = connection_dirserv_flushed_some(TO_DIR_CONN(conn));
5251 } else if (conn->type == CONN_TYPE_OR) {
5253 } else if (CONN_IS_EDGE(conn)) {
5255 }
5256 conn->in_flushed_some = 0;
5257 return r;
5258}
5259
5260/** We just finished flushing bytes to the appropriately low network layer,
5261 * and there are no more bytes remaining in conn->outbuf or
5262 * conn->tls to be flushed.
5263 *
5264 * This function just passes conn to the connection-specific
5265 * connection_*_finished_flushing() function.
5266 */
5267static int
5269{
5270 tor_assert(conn);
5271
5272 /* If the connection is closed, don't try to do anything more here. */
5273 if (CONN_IS_CLOSED(conn))
5274 return 0;
5275
5276// log_fn(LOG_DEBUG,"entered. Socket %u.", conn->s);
5277
5279
5280 switch (conn->type) {
5281 case CONN_TYPE_OR:
5283 case CONN_TYPE_EXT_OR:
5285 case CONN_TYPE_AP:
5286 case CONN_TYPE_EXIT:
5288 case CONN_TYPE_DIR:
5290 case CONN_TYPE_CONTROL:
5292 case CONN_TYPE_METRICS:
5294 default:
5295 log_err(LD_BUG,"got unexpected conn type %d.", conn->type);
5297 return -1;
5298 }
5299}
5300
5301/** Called when our attempt to connect() to a server has just succeeded.
5302 *
5303 * This function checks if the interface address has changed (clients only),
5304 * and then passes conn to the connection-specific
5305 * connection_*_finished_connecting() function.
5306 */
5307static int
5309{
5310 tor_assert(conn);
5311
5312 if (!server_mode(get_options())) {
5313 /* See whether getsockname() says our address changed. We need to do this
5314 * now that the connection has finished, because getsockname() on Windows
5315 * won't work until then. */
5317 }
5318
5319 switch (conn->type)
5320 {
5321 case CONN_TYPE_OR:
5323 case CONN_TYPE_EXIT:
5325 case CONN_TYPE_DIR:
5327 default:
5328 log_err(LD_BUG,"got unexpected conn type %d.", conn->type);
5330 return -1;
5331 }
5332}
5333
5334/** Callback: invoked when a connection reaches an EOF event. */
5335static int
5337{
5338 switch (conn->type) {
5339 case CONN_TYPE_OR:
5340 case CONN_TYPE_EXT_OR:
5342 case CONN_TYPE_AP:
5343 case CONN_TYPE_EXIT:
5345 case CONN_TYPE_DIR:
5347 case CONN_TYPE_CONTROL:
5349 case CONN_TYPE_METRICS:
5350 return metrics_connection_reached_eof(conn);
5351 default:
5352 log_err(LD_BUG,"got unexpected conn type %d.", conn->type);
5354 return -1;
5355 }
5356}
5357
5358/** Comparator for the two-orconn case in OOS victim sort */
5359static int
5361{
5362 int a_circs, b_circs;
5363 /* Fewer circuits == higher priority for OOS kill, sort earlier */
5364
5365 a_circs = connection_or_get_num_circuits(a);
5366 b_circs = connection_or_get_num_circuits(b);
5367
5368 if (a_circs < b_circs) return 1;
5369 else if (a_circs > b_circs) return -1;
5370 else return 0;
5371}
5372
5373/** Sort comparator for OOS victims; better targets sort before worse
5374 * ones. */
5375static int
5376oos_victim_comparator(const void **a_v, const void **b_v)
5377{
5378 connection_t *a = NULL, *b = NULL;
5379
5380 /* Get connection pointers out */
5381
5382 a = (connection_t *)(*a_v);
5383 b = (connection_t *)(*b_v);
5384
5385 tor_assert(a != NULL);
5386 tor_assert(b != NULL);
5387
5388 /*
5389 * We always prefer orconns as victims currently; we won't even see
5390 * these non-orconn cases, but if we do, sort them after orconns.
5391 */
5392 if (a->type == CONN_TYPE_OR && b->type == CONN_TYPE_OR) {
5394 } else {
5395 /*
5396 * One isn't an orconn; if one is, it goes first. We currently have no
5397 * opinions about cases where neither is an orconn.
5398 */
5399 if (a->type == CONN_TYPE_OR) return -1;
5400 else if (b->type == CONN_TYPE_OR) return 1;
5401 else return 0;
5402 }
5403}
5404
5405/** Pick n victim connections for the OOS handler and return them in a
5406 * smartlist.
5407 */
5410{
5411 smartlist_t *eligible = NULL, *victims = NULL;
5412 smartlist_t *conns;
5413 int conn_counts_by_type[CONN_TYPE_MAX_ + 1], i;
5414
5415 /*
5416 * Big damn assumption (someone improve this someday!):
5417 *
5418 * Socket exhaustion normally happens on high-volume relays, and so
5419 * most of the connections involved are orconns. We should pick victims
5420 * by assembling a list of all orconns, and sorting them in order of
5421 * how much 'damage' by some metric we'd be doing by dropping them.
5422 *
5423 * If we move on from orconns, we should probably think about incoming
5424 * directory connections next, or exit connections. Things we should
5425 * probably never kill are controller connections and listeners.
5426 *
5427 * This function will count how many connections of different types
5428 * exist and log it for purposes of gathering data on typical OOS
5429 * situations to guide future improvements.
5430 */
5431
5432 /* First, get the connection array */
5433 conns = get_connection_array();
5434 /*
5435 * Iterate it and pick out eligible connection types, and log some stats
5436 * along the way.
5437 */
5438 eligible = smartlist_new();
5439 memset(conn_counts_by_type, 0, sizeof(conn_counts_by_type));
5441 /* Bump the counter */
5442 tor_assert(c->type <= CONN_TYPE_MAX_);
5443 ++(conn_counts_by_type[c->type]);
5444
5445 /* Skip anything without a socket we can free */
5446 if (!(SOCKET_OK(c->s))) {
5447 continue;
5448 }
5449
5450 /* Skip anything we would count as moribund */
5451 if (connection_is_moribund(c)) {
5452 continue;
5453 }
5454
5455 switch (c->type) {
5456 case CONN_TYPE_OR:
5457 /* We've got an orconn, it's eligible to be OOSed */
5458 smartlist_add(eligible, c);
5459 break;
5460 default:
5461 /* We don't know what to do with it, ignore it */
5462 break;
5463 }
5464 } SMARTLIST_FOREACH_END(c);
5465
5466 /* Log some stats */
5467 if (smartlist_len(conns) > 0) {
5468 /* At least one counter must be non-zero */
5469 log_info(LD_NET, "Some stats on conn types seen during OOS follow");
5470 for (i = CONN_TYPE_MIN_; i <= CONN_TYPE_MAX_; ++i) {
5471 /* Did we see any? */
5472 if (conn_counts_by_type[i] > 0) {
5473 log_info(LD_NET, "%s: %d conns",
5475 conn_counts_by_type[i]);
5476 }
5477 }
5478 log_info(LD_NET, "Done with OOS conn type stats");
5479 }
5480
5481 /* Did we find more eligible targets than we want to kill? */
5482 if (smartlist_len(eligible) > n) {
5483 /* Sort the list in order of target preference */
5485 /* Pick first n as victims */
5486 victims = smartlist_new();
5487 for (i = 0; i < n; ++i) {
5488 smartlist_add(victims, smartlist_get(eligible, i));
5489 }
5490 /* Free the original list */
5491 smartlist_free(eligible);
5492 } else {
5493 /* No, we can just call them all victims */
5494 victims = eligible;
5495 }
5496
5497 return victims;
5498}
5499
5500/** Kill a list of connections for the OOS handler. */
5501MOCK_IMPL(STATIC void,
5503{
5504 if (!conns) return;
5505
5507 /* Make sure the channel layer gets told about orconns */
5508 if (c->type == CONN_TYPE_OR) {
5510 } else {
5511 connection_mark_for_close(c);
5512 }
5513 } SMARTLIST_FOREACH_END(c);
5514
5515 log_notice(LD_NET,
5516 "OOS handler marked %d connections",
5517 smartlist_len(conns));
5518}
5519
5520/** Check if a connection is on the way out so the OOS handler doesn't try
5521 * to kill more than it needs. */
5522int
5524{
5525 if (conn != NULL &&
5526 (conn->conn_array_index < 0 ||
5527 conn->marked_for_close)) {
5528 return 1;
5529 } else {
5530 return 0;
5531 }
5532}
5533
5534/** Out-of-Sockets handler; n_socks is the current number of open
5535 * sockets, and failed is non-zero if a socket exhaustion related
5536 * error immediately preceded this call. This is where to do
5537 * circuit-killing heuristics as needed.
5538 */
5539void
5540connection_check_oos(int n_socks, int failed)
5541{
5542 int target_n_socks = 0, moribund_socks, socks_to_kill;
5543 smartlist_t *conns;
5544
5545 /* Early exit: is OOS checking disabled? */
5546 if (get_options()->DisableOOSCheck) {
5547 return;
5548 }
5549