9 #define TOR_CONGESTION_CONTROL_COMMON_PRIVATE
16 #include "core/or/or_circuit_st.h"
26 #include "core/or/trace_probes_cc.h"
31 #include "trunnel/congestion_control.h"
32 #include "trunnel/extension.h"
38 #define SENDME_INC_DFLT (TLS_RECORD_MAX_CELLS)
39 #define CIRCWINDOW_INIT (4*SENDME_INC_DFLT)
41 #define CC_ALG_DFLT (CC_ALG_SENDME)
42 #define CC_ALG_DFLT_ALWAYS (CC_ALG_VEGAS)
44 #define CWND_INC_DFLT (TLS_RECORD_MAX_CELLS)
45 #define CWND_INC_PCT_SS_DFLT (100)
46 #define CWND_INC_RATE_DFLT (1)
48 #define CWND_MIN_DFLT (2*SENDME_INC_DFLT)
49 #define CWND_MAX_DFLT (INT32_MAX)
51 #define BWE_SENDME_MIN_DFLT (5)
53 #define N_EWMA_CWND_PCT_DFLT (50)
54 #define N_EWMA_MAX_DFLT (10)
55 #define N_EWMA_SS_DFLT (2)
57 #define RTT_RESET_PCT_DFLT (100)
61 #define WESTWOOD_BDP_ALG BDP_ALG_PIECEWISE
62 #define VEGAS_BDP_MIX_ALG BDP_ALG_PIECEWISE
63 #define NOLA_BDP_ALG BDP_ALG_PIECEWISE
74 #define OR_CONN_HIGHWATER_DFLT (32*1024)
75 #define OR_CONN_LOWWATER_DFLT (16*1024)
82 #define CELL_QUEUE_LOW_DFLT (10)
83 #define CELL_QUEUE_HIGH_DFLT (256)
95 static uint64_t num_rtt_reset;
98 static uint64_t num_clock_stalls;
101 static uint32_t cwnd_max = CWND_MAX_DFLT;
102 int32_t cell_queue_high = CELL_QUEUE_HIGH_DFLT;
103 int32_t cell_queue_low = CELL_QUEUE_LOW_DFLT;
104 uint32_t or_conn_highwater = OR_CONN_HIGHWATER_DFLT;
105 uint32_t or_conn_lowwater = OR_CONN_LOWWATER_DFLT;
106 uint8_t cc_sendme_inc = SENDME_INC_DFLT;
107 static cc_alg_t cc_alg = CC_ALG_DFLT;
142 return num_rtt_reset;
149 return num_clock_stalls;
159 #define CELL_QUEUE_HIGH_MIN (1)
160 #define CELL_QUEUE_HIGH_MAX (1000)
162 CELL_QUEUE_HIGH_DFLT,
164 CELL_QUEUE_HIGH_MAX);
166 #define CELL_QUEUE_LOW_MIN (1)
167 #define CELL_QUEUE_LOW_MAX (1000)
173 #define OR_CONN_HIGHWATER_MIN (CELL_PAYLOAD_SIZE)
174 #define OR_CONN_HIGHWATER_MAX (INT32_MAX)
177 OR_CONN_HIGHWATER_DFLT,
178 OR_CONN_HIGHWATER_MIN,
179 OR_CONN_HIGHWATER_MAX);
181 #define OR_CONN_LOWWATER_MIN (CELL_PAYLOAD_SIZE)
182 #define OR_CONN_LOWWATER_MAX (INT32_MAX)
185 OR_CONN_LOWWATER_DFLT,
186 OR_CONN_LOWWATER_MIN,
187 OR_CONN_LOWWATER_MAX);
189 #define CWND_MAX_MIN 500
190 #define CWND_MAX_MAX (INT32_MAX)
197 #define RTT_RESET_PCT_MIN (0)
198 #define RTT_RESET_PCT_MAX (100)
205 #define SENDME_INC_MIN 1
206 #define SENDME_INC_MAX (255)
214 #define CC_ALG_MAX (NUM_CC_ALGS-1)
221 #define BWE_SENDME_MIN_MIN 2
222 #define BWE_SENDME_MIN_MAX (20)
229 #define N_EWMA_CWND_PCT_MIN 1
230 #define N_EWMA_CWND_PCT_MAX (255)
233 N_EWMA_CWND_PCT_DFLT,
235 N_EWMA_CWND_PCT_MAX);
237 #define N_EWMA_MAX_MIN 2
238 #define N_EWMA_MAX_MAX (INT32_MAX)
245 #define N_EWMA_SS_MIN 2
246 #define N_EWMA_SS_MAX (INT32_MAX)
271 #define CWND_INIT_MIN SENDME_INC_DFLT
272 #define CWND_INIT_MAX (10000)
279 #define CWND_INC_PCT_SS_MIN 1
280 #define CWND_INC_PCT_SS_MAX (500)
283 CWND_INC_PCT_SS_DFLT,
285 CWND_INC_PCT_SS_MAX);
287 #define CWND_INC_MIN 1
288 #define CWND_INC_MAX (1000)
295 #define CWND_INC_RATE_MIN 1
296 #define CWND_INC_RATE_MAX (250)
303 #define CWND_MIN_MIN SENDME_INC_DFLT
304 #define CWND_MIN_MAX (1000)
315 cc->
cc_alg = CC_ALG_DFLT_ALWAYS;
324 default_bdp_alg = WESTWOOD_BDP_ALG;
327 default_bdp_alg = VEGAS_BDP_MIX_ALG;
330 default_bdp_alg = NOLA_BDP_ALG;
456 uint64_t *timestamp_ptr = tor_malloc(
sizeof(uint64_t));
457 *timestamp_ptr = timestamp_usec;
465 static inline uint64_t
468 uint64_t *timestamp_ptr = smartlist_get(timestamps_u64_usecs, 0);
470 if (BUG(!timestamp_ptr)) {
471 log_err(
LD_CIRC,
"Congestion control timestamp list became empty!");
475 return *timestamp_ptr;
482 static inline uint64_t
485 uint64_t *timestamp_ptr = smartlist_get(timestamps_u64_usecs, 0);
486 uint64_t timestamp_u64;
488 if (BUG(!timestamp_ptr)) {
489 log_err(
LD_CIRC,
"Congestion control timestamp list became empty!");
493 timestamp_u64 = *timestamp_ptr;
497 return timestamp_u64;
507 static inline uint64_t
510 uint64_t ewma_cnt = 0;
522 ewma_cnt =
MAX(ewma_cnt, 2);
549 return package_window;
694 streams = CONST_TO_ORIGIN_CIRCUIT(circ)->
p_streams;
696 streams = CONST_TO_OR_CIRCUIT(circ)->
n_streams;
702 if (conn->base_.marked_for_close)
705 if (!layer_hint || conn->cpath_layer == layer_hint) {
706 if (connection_get_inbuf_len(
TO_CONN(conn)) > 0) {
707 log_info(
LD_CIRC,
"CC: More in edge inbuf...");
712 if (!
TO_CONN(conn)->inbuf_reached_eof) {
713 log_info(
LD_CIRC,
"CC: More on edge conn...");
717 if (
TO_CONN(conn)->linked_conn) {
718 if (connection_get_inbuf_len(
TO_CONN(conn)->linked_conn) > 0) {
719 log_info(
LD_CIRC,
"CC: More in linked inbuf...");
725 if (!
TO_CONN(conn)->linked_conn->inbuf_reached_eof) {
726 log_info(
LD_CIRC,
"CC: More on linked conn...");
772 if (cc->bdp[BDP_ALG_SENDME_RATE]) {
780 static bool is_monotime_clock_broken =
false;
793 uint64_t old_delta, uint64_t new_delta)
795 #define DELTA_DISCREPENCY_RATIO_MAX 5000
797 if (new_delta == 0) {
798 static ratelim_t stall_info_limit = RATELIM_INIT(60);
800 "Congestion control cannot measure RTT due to monotime stall.");
802 is_monotime_clock_broken =
true;
823 if (old_delta > new_delta * DELTA_DISCREPENCY_RATIO_MAX) {
824 static ratelim_t dec_notice_limit = RATELIM_INIT(300);
826 "Sudden decrease in circuit RTT (%"PRIu64
" vs %"PRIu64
827 "), likely due to clock jump.",
828 new_delta/1000, old_delta/1000);
830 return is_monotime_clock_broken;
838 if (new_delta > old_delta * DELTA_DISCREPENCY_RATIO_MAX) {
839 static ratelim_t dec_notice_limit = RATELIM_INIT(300);
841 "Sudden increase in circuit RTT (%"PRIu64
" vs %"PRIu64
842 "), likely due to clock jump or suspended remote endpoint.",
843 new_delta/1000, old_delta/1000);
849 is_monotime_clock_broken =
false;
860 return !is_monotime_clock_broken;
877 uint64_t rtt, ewma_cnt;
878 uint64_t sent_at_timestamp;
886 rtt = now_usec - sent_at_timestamp;
898 if (rtt > cc->max_rtt_usec) {
899 cc->max_rtt_usec = rtt;
902 if (cc->min_rtt_usec == 0) {
911 static ratelim_t rtt_notice_limit = RATELIM_INIT(300);
913 "Resetting circ RTT from %"PRIu64
" to %"PRIu64
" due to low cwnd",
914 cc->min_rtt_usec/1000, new_rtt/1000);
916 cc->min_rtt_usec = new_rtt;
944 uint64_t curr_rtt_usec)
947 unsigned int blocked_on_chan = 0;
948 uint64_t timestamp_usec;
949 uint64_t sendme_rate_bdp = 0;
967 uint64_t cwnd = cc->
cwnd;
969 tor_assert_nonfatal(cc->
cwnd <= cwnd_max);
973 if (blocked_on_chan) {
975 if (chan_q >= (int64_t)cwnd) {
977 "Clock stall with large chanq: %d %"PRIu64, chan_q, cwnd);
987 cc->bdp[BDP_ALG_CWND_RTT] = cwnd;
988 cc->bdp[BDP_ALG_INFLIGHT_RTT] = cwnd;
989 cc->bdp[BDP_ALG_SENDME_RATE] = cwnd;
990 cc->bdp[BDP_ALG_PIECEWISE] = cwnd;
992 static ratelim_t dec_notice_limit = RATELIM_INIT(300);
994 "Our clock has been stalled for the entire lifetime of a circuit. "
995 "Performance may be sub-optimal.");
997 return blocked_on_chan;
1017 "CC: Streams drained. Spare package window: %"PRIu64
1051 uint64_t delta = now_usec - timestamp_usec;
1061 uint64_t cells = (sendme_cnt-1)*cc->
sendme_inc;
1066 sendme_rate_bdp = cells*cc->min_rtt_usec/delta;
1069 cc->bdp[BDP_ALG_SENDME_RATE] =
1070 n_count_ewma(sendme_rate_bdp, cc->bdp[BDP_ALG_SENDME_RATE],
1078 cc->bdp[BDP_ALG_INFLIGHT_RTT] =
1079 (cc->
inflight - chan_q)*cc->min_rtt_usec/
1084 cc->bdp[BDP_ALG_INFLIGHT_RTT] =
1089 if (blocked_on_chan) {
1090 log_info(
LD_CIRC,
"CC: Streams blocked on circ channel. Chanq: %d",
1100 if (cc->bdp[BDP_ALG_SENDME_RATE]) {
1101 cc->bdp[BDP_ALG_PIECEWISE] = MIN(cc->bdp[BDP_ALG_INFLIGHT_RTT],
1102 cc->bdp[BDP_ALG_SENDME_RATE]);
1104 cc->bdp[BDP_ALG_PIECEWISE] = cc->bdp[BDP_ALG_INFLIGHT_RTT];
1112 log_info(
LD_CIRC,
"CC: Streams un-blocked on circ channel. Chanq: %d",
1116 cc->bdp[BDP_ALG_PIECEWISE] =
MAX(cc->bdp[BDP_ALG_SENDME_RATE],
1117 cc->bdp[BDP_ALG_CWND_RTT]);
1124 if (!cc->bdp[BDP_ALG_PIECEWISE]) {
1125 cc->bdp[BDP_ALG_PIECEWISE] = cc->bdp[BDP_ALG_CWND_RTT];
1126 log_info(
LD_CIRC,
"CC: No piecewise BDP. Using %"PRIu64,
1127 cc->bdp[BDP_ALG_PIECEWISE]);
1134 "SENDME RTT: %"PRIu64
", %"PRIu64
", %"PRIu64
", %"PRIu64
", "
1141 CONST_TO_ORIGIN_CIRCUIT(circ)->global_identifier,
1142 cc->min_rtt_usec/1000,
1145 cc->max_rtt_usec/1000,
1146 cc->bdp[BDP_ALG_INFLIGHT_RTT],
1147 cc->bdp[BDP_ALG_CWND_RTT],
1149 cc->bdp[BDP_ALG_SENDME_RATE],
1150 cc->bdp[BDP_ALG_PIECEWISE]
1154 "CC: Circuit %"PRIu64
":%d "
1155 "SENDME RTT: %"PRIu64
", %"PRIu64
", %"PRIu64
", %"PRIu64
", "
1161 CONST_TO_OR_CIRCUIT(circ)->p_chan->global_identifier,
1162 CONST_TO_OR_CIRCUIT(circ)->p_circ_id,
1163 cc->min_rtt_usec/1000,
1166 cc->max_rtt_usec/1000,
1167 cc->bdp[BDP_ALG_INFLIGHT_RTT],
1168 cc->bdp[BDP_ALG_CWND_RTT],
1170 cc->bdp[BDP_ALG_SENDME_RATE],
1171 cc->bdp[BDP_ALG_PIECEWISE]
1178 bool ret = (blocked_on_chan || curr_rtt_usec != 0);
1180 tor_trace(
TR_SUBSYS(cc), TR_EV(bdp_update), circ, cc, curr_rtt_usec,
1194 int ret = -END_CIRC_REASON_INTERNAL;
1213 if (cc->
cwnd > cwnd_max) {
1214 static ratelim_t cwnd_limit = RATELIM_INIT(60);
1216 "Congestion control cwnd %"PRIu64
" exceeds max %d, clamping.",
1217 cc->
cwnd, cwnd_max);
1218 cc->
cwnd = cwnd_max;
1242 uint8_t *request = NULL;
1243 trn_extension_t *ext = NULL;
1244 trn_extension_field_t *field = NULL;
1246 ext = trn_extension_new();
1253 field = trn_extension_field_new();
1254 trn_extension_field_set_field_type(field,
1255 TRUNNEL_EXT_TYPE_CC_FIELD_REQUEST);
1258 trn_extension_field_set_field_len(field, 0);
1261 trn_extension_add_fields(ext, field);
1262 trn_extension_set_num(ext, 1);
1266 ssize_t ret = trn_extension_encoded_len(ext);
1270 size_t request_len = ret;
1271 request = tor_malloc_zero(request_len);
1272 ret = trn_extension_encode(request, request_len, ext);
1278 *msg_len_out = request_len;
1284 trn_extension_free(ext);
1302 trn_extension_t *ext = NULL;
1303 size_t num_fields = 0;
1306 ret = trn_extension_parse(&ext, msg, msg_len);
1313 if ((num_fields = trn_extension_get_num(ext)) == 0) {
1320 for (
size_t f = 0; f < num_fields; f++) {
1321 const trn_extension_field_t *field = trn_extension_get_fields(ext, f);
1322 if (field == NULL) {
1328 if (trn_extension_field_get_field_type(field) ==
1329 TRUNNEL_EXT_TYPE_CC_FIELD_REQUEST) {
1336 trn_extension_free(ext);
1362 uint8_t **msg_out,
size_t *msg_len_out)
1365 uint8_t *request = NULL;
1366 trn_extension_t *ext = NULL;
1367 trn_extension_field_t *field = NULL;
1368 trn_extension_field_cc_t *cc_field = NULL;
1375 ext = trn_extension_new();
1379 field = trn_extension_field_new();
1380 trn_extension_field_set_field_type(field,
1381 TRUNNEL_EXT_TYPE_CC_FIELD_RESPONSE);
1384 cc_field = trn_extension_field_cc_new();
1385 trn_extension_field_cc_set_sendme_inc(cc_field,
1388 ret = trn_extension_field_cc_encoded_len(cc_field);
1389 if (BUG(ret <= 0)) {
1390 trn_extension_field_free(field);
1393 size_t field_len = ret;
1394 trn_extension_field_set_field_len(field, field_len);
1395 trn_extension_field_setlen_field(field, field_len);
1397 uint8_t *field_array = trn_extension_field_getarray_field(field);
1398 ret = trn_extension_field_cc_encode(field_array,
1399 trn_extension_field_getlen_field(field), cc_field);
1400 if (BUG(ret <= 0)) {
1401 trn_extension_field_free(field);
1406 trn_extension_add_fields(ext, field);
1407 trn_extension_set_num(ext, 1);
1411 ret = trn_extension_encoded_len(ext);
1415 size_t request_len = ret;
1416 request = tor_malloc_zero(request_len);
1417 ret = trn_extension_encode(request, request_len, ext);
1423 *msg_len_out = request_len;
1429 trn_extension_free(ext);
1430 trn_extension_field_cc_free(cc_field);
1444 #define MAX_SENDME_INC_NEGOTIATE_FACTOR 2
1446 if (sendme_inc == 0)
1462 const size_t msg_len,
1466 size_t num_fields = 0;
1467 trn_extension_t *ext = NULL;
1468 trn_extension_field_cc_t *cc_field = NULL;
1475 #define MAX_SENDME_INC_NEGOTIATE_FACTOR 2
1478 ret = trn_extension_parse(&ext, msg, msg_len);
1483 if ((num_fields = trn_extension_get_num(ext)) == 0) {
1490 for (
size_t f = 0; f < num_fields; f++) {
1491 const trn_extension_field_t *field = trn_extension_get_fields(ext, f);
1492 if (field == NULL) {
1498 if (trn_extension_field_get_field_type(field) ==
1499 TRUNNEL_EXT_TYPE_CC_FIELD_RESPONSE) {
1502 ret = trn_extension_field_cc_parse(&cc_field,
1503 trn_extension_field_getconstarray_field(field),
1504 trn_extension_field_getlen_field(field));
1509 uint8_t sendme_inc_cells =
1510 trn_extension_field_cc_get_sendme_inc(cc_field);
1524 trn_extension_free(ext);
1525 trn_extension_field_cc_free(cc_field);
1560 " SS=%d CWND=%"PRIu64
" RTT=%"PRIu64
" MIN_RTT=%"PRIu64,
1563 ccontrol->min_rtt_usec/1000);
1565 log_warn(
LD_BUG,
"Unable to format event for controller.");
Header file for channel.c.
Header file for circuitlist.c.
#define CIRCUIT_IS_ORIGIN(c)
uint64_t monotime_absolute_usec(void)
Functions and types for monotonic times.
const or_options_t * get_options(void)
Header file for config.c.
bool congestion_control_validate_sendme_increment(uint8_t sendme_inc)
static uint64_t n_ewma_count(const congestion_control_t *cc)
char * congestion_control_get_control_port_fields(const origin_circuit_t *circ)
int sendme_get_inc_count(const circuit_t *circ, const crypt_path_t *layer_hint)
static uint64_t dequeue_timestamp(smartlist_t *timestamps_u64_usecs)
static uint8_t rtt_reset_pct
static uint64_t peek_timestamp(const smartlist_t *timestamps_u64_usecs)
static uint8_t n_ewma_max
static uint64_t congestion_control_update_circuit_rtt(congestion_control_t *, uint64_t)
void congestion_control_free_(congestion_control_t *cc)
static void congestion_control_init(congestion_control_t *cc, const circuit_params_t *params, cc_path_t path)
int congestion_control_dispatch_cc_alg(congestion_control_t *cc, const circuit_t *circ, const crypt_path_t *layer_hint)
bool circuit_sent_cell_for_sendme(const circuit_t *circ, const crypt_path_t *layer_hint)
uint64_t congestion_control_get_num_clock_stalls(void)
static bool time_delta_should_use_heuristics(const congestion_control_t *cc)
static bool time_delta_stalled_or_jumped(const congestion_control_t *cc, uint64_t old_delta, uint64_t new_delta)
uint64_t congestion_control_get_num_rtt_reset(void)
void congestion_control_note_cell_sent(congestion_control_t *cc, const circuit_t *circ, const crypt_path_t *cpath)
int congestion_control_build_ext_response(const circuit_params_t *our_params, const circuit_params_t *circ_params, uint8_t **msg_out, size_t *msg_len_out)
static int circuit_has_active_streams(const circuit_t *circ, const crypt_path_t *layer_hint)
static bool congestion_control_update_circuit_bdp(congestion_control_t *, const circuit_t *, const crypt_path_t *, uint64_t, uint64_t)
int congestion_control_parse_ext_request(const uint8_t *msg, const size_t msg_len)
void congestion_control_set_cc_enabled(void)
int congestion_control_build_ext_request(uint8_t **msg_out, size_t *msg_len_out)
static void congestion_control_init_params(congestion_control_t *cc, const circuit_params_t *params, cc_path_t path)
int congestion_control_parse_ext_response(const uint8_t *msg, const size_t msg_len, circuit_params_t *params_out)
static uint8_t bwe_sendme_min
congestion_control_t * congestion_control_new(const circuit_params_t *params, cc_path_t path)
bool congestion_control_enabled(void)
bool congestion_control_update_circuit_estimates(congestion_control_t *cc, const circuit_t *circ, const crypt_path_t *layer_hint)
uint64_t cc_stats_circs_created
bool is_monotime_clock_reliable(void)
int congestion_control_get_package_window(const circuit_t *circ, const crypt_path_t *cpath)
static void enqueue_timestamp(smartlist_t *timestamps_u64, uint64_t timestamp_usec)
static uint8_t n_ewma_cwnd_pct
void congestion_control_new_consensus_params(const networkstatus_t *ns)
Public APIs for congestion control.
static uint64_t n_count_ewma(uint64_t curr, uint64_t prev, uint64_t N)
static uint64_t percent_max_mix(uint64_t a, uint64_t b, uint8_t pct_max)
static uint8_t congestion_control_sendme_inc(void)
int congestion_control_nola_process_sendme(congestion_control_t *cc, const circuit_t *circ, const crypt_path_t *layer_hint)
void congestion_control_nola_set_params(congestion_control_t *cc)
Private-ish APIs for the TOR_NOLA congestion control algorithm.
Structure definitions for congestion control.
static uint64_t CWND_UPDATE_RATE(const struct congestion_control_t *cc)
void congestion_control_vegas_set_params(congestion_control_t *cc, cc_path_t path)
int congestion_control_vegas_process_sendme(congestion_control_t *cc, const circuit_t *circ, const crypt_path_t *layer_hint)
Private-ish APIs for the TOR_VEGAS congestion control algorithm.
int congestion_control_westwood_process_sendme(congestion_control_t *cc, const circuit_t *circ, const crypt_path_t *layer_hint)
void congestion_control_westwood_set_params(congestion_control_t *cc)
Private-ish APIs for the TOR_WESTWOOD congestion control algorithm.
Header file for connection.c.
Header file for crypt_path.c.
#define log_fn_ratelim(ratelim, severity, domain, args,...)
int32_t networkstatus_get_param(const networkstatus_t *ns, const char *param_name, int32_t default_val, int32_t min_val, int32_t max_val)
Header file for networkstatus.c.
Header file for onion_crypto.c.
Master header file for Tor-specific functionality.
#define CIRCWINDOW_INCREMENT
Origin circuit structure.
int tor_asprintf(char **strp, const char *fmt,...)
Header file for sendme.c.
smartlist_t * smartlist_new(void)
void smartlist_add(smartlist_t *sl, void *element)
void smartlist_clear(smartlist_t *sl)
void smartlist_del_keeporder(smartlist_t *sl, int idx)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
unsigned int streams_blocked_on_n_chan
cell_queue_t n_chan_cells
unsigned int streams_blocked_on_p_chan
struct congestion_control_t * ccontrol
smartlist_t * sendme_arrival_timestamps
smartlist_t * sendme_pending_timestamps
struct crypt_path_t * prev
struct congestion_control_t * ccontrol
struct edge_connection_t * next_stream
cell_queue_t p_chan_cells
edge_connection_t * n_streams
int AlwaysCongestionControl
edge_connection_t * p_streams
#define tor_fragile_assert()