Tor  0.4.8.0-alpha-dev
circuitstats.c
Go to the documentation of this file.
1 /* Copyright (c) 2001 Matej Pfajfar.
2  * Copyright (c) 2001-2004, Roger Dingledine.
3  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4  * Copyright (c) 2007-2021, The Tor Project, Inc. */
5 /* See LICENSE for licensing information */
6 
7 /**
8  * \file circuitstats.c
9  *
10  * \brief Maintains and analyzes statistics about circuit built times, so we
11  * can tell how long we may need to wait for a fast circuit to be constructed.
12  *
13  * By keeping these statistics, a client learns when it should time out a slow
14  * circuit for being too slow, and when it should keep a circuit open in order
15  * to wait for it to complete.
16  *
17  * The information here is kept in a circuit_built_times_t structure, which is
18  * currently a singleton, but doesn't need to be. It's updated by calls to
19  * circuit_build_times_count_timeout() from circuituse.c,
20  * circuit_build_times_count_close() from circuituse.c, and
21  * circuit_build_times_add_time() from circuitbuild.c, and inspected by other
22  * calls into this module, mostly from circuitlist.c. Observations are
23  * persisted to disk via the or_state_t-related calls.
24  */
25 
26 #define CIRCUITSTATS_PRIVATE
27 
28 #include "core/or/or.h"
29 #include "core/or/circuitbuild.h"
30 #include "core/or/circuitstats.h"
31 #include "app/config/config.h"
32 #include "lib/confmgt/confmgt.h"
35 #include "core/mainloop/mainloop.h"
37 #include "feature/relay/router.h"
38 #include "app/config/statefile.h"
39 #include "core/or/circuitlist.h"
40 #include "core/or/circuituse.h"
41 #include "lib/math/fp.h"
42 #include "lib/time/tvdiff.h"
43 #include "lib/encoding/confline.h"
45 #include "feature/hs/hs_service.h"
47 
48 #include "core/or/crypt_path_st.h"
50 #include "app/config/or_state_st.h"
51 
52 #undef log
53 #include <math.h>
54 
56 
57 #define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2))
58 
59 /** Global list of circuit build times */
60 // XXXX: Add this as a member for entry_guard_t instead of global?
61 // Then we could do per-guard statistics, as guards are likely to
62 // vary in their own latency. The downside of this is that guards
63 // can change frequently, so we'd be building a lot more circuits
64 // most likely.
66 
67 #ifdef TOR_UNIT_TESTS
68 /** If set, we're running the unit tests: we should avoid clobbering
69  * our state file or accessing get_options() or get_or_state() */
70 static int unit_tests = 0;
71 #else
72 #define unit_tests 0
73 #endif /* defined(TOR_UNIT_TESTS) */
74 
75 /** Return a pointer to the data structure describing our current circuit
76  * build time history and computations. */
79 {
80  return &circ_times;
81 }
82 
83 /** As get_circuit_build_times, but return a mutable pointer. */
86 {
87  return &circ_times;
88 }
89 
90 /** Return the time to wait before actually closing an under-construction, in
91  * milliseconds. */
92 double
94 {
95  return circ_times.close_ms;
96 }
97 
98 /** Return the time to wait before giving up on an under-construction circuit,
99  * in milliseconds. */
100 double
102 {
103  return circ_times.timeout_ms;
104 }
105 
106 /**
107  * This function decides if CBT learning should be disabled. It returns
108  * true if one or more of the following conditions are met:
109  *
110  * 1. If the cbtdisabled consensus parameter is set.
111  * 2. If the torrc option LearnCircuitBuildTimeout is false.
112  * 3. If we are a directory authority
113  * 4. If we fail to write circuit build time history to our state file.
114  * 5. If we are configured in Single Onion mode
115  */
116 int
118 {
119  return circuit_build_times_disabled_(options, 0);
120 }
121 
122 /** As circuit_build_times_disabled, but take options as an argument. */
123 int
125  int ignore_consensus)
126 {
127  if (unit_tests) {
128  return 0;
129  } else {
130  int consensus_disabled =
131  ignore_consensus ? 0 : networkstatus_get_param(NULL, "cbtdisabled",
132  0, 0, 1);
133  int config_disabled = !options->LearnCircuitBuildTimeout;
134  int dirauth_disabled = authdir_mode(options);
135  int state_disabled = did_last_state_file_write_fail() ? 1 : 0;
136  /* LearnCircuitBuildTimeout and Single Onion Services are
137  * incompatible in two ways:
138  *
139  * - LearnCircuitBuildTimeout results in a low CBT, which
140  * Single Onion use of one-hop intro and rendezvous circuits lowers
141  * much further, producing *far* too many timeouts.
142  *
143  * - The adaptive CBT code does not update its timeout estimate
144  * using build times for single-hop circuits.
145  *
146  * If we fix both of these issues someday, we should test
147  * these modes with LearnCircuitBuildTimeout on again. */
148  int single_onion_disabled = hs_service_allow_non_anonymous_connection(
149  options);
150 
151  if (consensus_disabled || config_disabled || dirauth_disabled ||
152  state_disabled || single_onion_disabled) {
153 #if 0
154  log_debug(LD_CIRC,
155  "CircuitBuildTime learning is disabled. "
156  "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d",
157  consensus_disabled, config_disabled, dirauth_disabled,
158  state_disabled);
159 #endif /* 0 */
160  return 1;
161  } else {
162 #if 0
163  log_debug(LD_CIRC,
164  "CircuitBuildTime learning is not disabled. "
165  "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d",
166  consensus_disabled, config_disabled, dirauth_disabled,
167  state_disabled);
168 #endif /* 0 */
169  return 0;
170  }
171  }
172 }
173 
174 /**
175  * Retrieve and bounds-check the cbtmaxtimeouts consensus parameter.
176  *
177  * Effect: When this many timeouts happen in the last 'cbtrecentcount'
178  * circuit attempts, the client should discard all of its history and
179  * begin learning a fresh timeout value.
180  */
181 static int32_t
183 {
184  int32_t cbt_maxtimeouts;
185 
186  cbt_maxtimeouts = networkstatus_get_param(NULL, "cbtmaxtimeouts",
188  CBT_MIN_MAX_RECENT_TIMEOUT_COUNT,
189  CBT_MAX_MAX_RECENT_TIMEOUT_COUNT);
190 
191  if (!(get_options()->LearnCircuitBuildTimeout)) {
192  log_debug(LD_BUG,
193  "circuit_build_times_max_timeouts() called, cbtmaxtimeouts is"
194  " %d",
195  cbt_maxtimeouts);
196  }
197 
198  return cbt_maxtimeouts;
199 }
200 
201 /**
202  * Retrieve and bounds-check the cbtnummodes consensus parameter.
203  *
204  * Effect: This value governs how many modes to use in the weighted
205  * average calculation of Pareto parameter Xm. Analysis of pairs of
206  * geographically near, far, and mixed guaeds has shown that a value of
207  * 10 introduces some allows for the actual timeout rate to be within
208  * 2-7% of the cutoff quantile, for quantiles between 60-80%.
209  */
210 static int32_t
212 {
213  int32_t num = networkstatus_get_param(NULL, "cbtnummodes",
215  CBT_MIN_NUM_XM_MODES,
216  CBT_MAX_NUM_XM_MODES);
217 
218  if (!(get_options()->LearnCircuitBuildTimeout)) {
219  log_debug(LD_BUG,
220  "circuit_build_times_default_num_xm_modes() called, cbtnummodes"
221  " is %d",
222  num);
223  }
224 
225  return num;
226 }
227 
228 /**
229  * Retrieve and bounds-check the cbtmincircs consensus parameter.
230  *
231  * Effect: This is the minimum number of circuits to build before
232  * computing a timeout.
233  */
234 static int32_t
236 {
237  int32_t num = networkstatus_get_param(NULL, "cbtmincircs",
239  CBT_MIN_MIN_CIRCUITS_TO_OBSERVE,
240  CBT_MAX_MIN_CIRCUITS_TO_OBSERVE);
241 
242  if (!(get_options()->LearnCircuitBuildTimeout)) {
243  log_debug(LD_BUG,
244  "circuit_build_times_min_circs_to_observe() called, cbtmincircs"
245  " is %d",
246  num);
247  }
248 
249  return num;
250 }
251 
252 /** Return true iff <b>cbt</b> has recorded enough build times that we
253  * want to start acting on the timeout it implies. */
254 int
256 {
258 }
259 
260 /**
261  * Retrieve and bounds-check the cbtquantile consensus parameter.
262  *
263  * Effect: This is the position on the quantile curve to use to set the
264  * timeout value. It is a percent (10-99).
265  */
266 double
268 {
269  int32_t num = networkstatus_get_param(NULL, "cbtquantile",
271  CBT_MIN_QUANTILE_CUTOFF,
272  CBT_MAX_QUANTILE_CUTOFF);
273 
274  if (!(get_options()->LearnCircuitBuildTimeout)) {
275  log_debug(LD_BUG,
276  "circuit_build_times_quantile_cutoff() called, cbtquantile"
277  " is %d",
278  num);
279  }
280 
281  return num/100.0;
282 }
283 
284 /**
285  * Retrieve and bounds-check the cbtclosequantile consensus parameter.
286  *
287  * Effect: This is the position on the quantile curve to use to set the
288  * timeout value to use to actually close circuits. It is a percent
289  * (0-99).
290  */
291 static double
293 {
294  int32_t param;
295  /* Cast is safe - circuit_build_times_quantile_cutoff() is capped */
296  int32_t min = (int)tor_lround(100*circuit_build_times_quantile_cutoff());
297  param = networkstatus_get_param(NULL, "cbtclosequantile",
299  CBT_MIN_CLOSE_QUANTILE,
300  CBT_MAX_CLOSE_QUANTILE);
301 
302  if (!(get_options()->LearnCircuitBuildTimeout)) {
303  log_debug(LD_BUG,
304  "circuit_build_times_close_quantile() called, cbtclosequantile"
305  " is %d", param);
306  }
307 
308  if (param < min) {
309  log_warn(LD_DIR, "Consensus parameter cbtclosequantile is "
310  "too small, raising to %d", min);
311  param = min;
312  }
313  return param / 100.0;
314 }
315 
316 /**
317  * Retrieve and bounds-check the cbttestfreq consensus parameter.
318  *
319  * Effect: Describes how often in seconds to build a test circuit to
320  * gather timeout values. Only applies if less than 'cbtmincircs'
321  * have been recorded.
322  */
323 static int32_t
325 {
326  int32_t num = networkstatus_get_param(NULL, "cbttestfreq",
328  CBT_MIN_TEST_FREQUENCY,
329  CBT_MAX_TEST_FREQUENCY);
330 
331  if (!(get_options()->LearnCircuitBuildTimeout)) {
332  log_debug(LD_BUG,
333  "circuit_build_times_test_frequency() called, cbttestfreq is %d",
334  num);
335  }
336 
337  return num;
338 }
339 
340 /**
341  * Retrieve and bounds-check the cbtmintimeout consensus parameter.
342  *
343  * Effect: This is the minimum allowed timeout value in milliseconds.
344  * The minimum is to prevent rounding to 0 (we only check once
345  * per second).
346  */
347 static int32_t
349 {
350  int32_t num = networkstatus_get_param(NULL, "cbtmintimeout",
352  CBT_MIN_TIMEOUT_MIN_VALUE,
353  CBT_MAX_TIMEOUT_MIN_VALUE);
354 
355  if (!(get_options()->LearnCircuitBuildTimeout)) {
356  log_debug(LD_BUG,
357  "circuit_build_times_min_timeout() called, cbtmintimeout is %d",
358  num);
359  }
360  return num;
361 }
362 
363 /**
364  * Retrieve and bounds-check the cbtinitialtimeout consensus parameter.
365  *
366  * Effect: This is the timeout value to use before computing a timeout,
367  * in milliseconds.
368  */
369 int32_t
371 {
372  int32_t min = circuit_build_times_min_timeout();
373  int32_t param = networkstatus_get_param(NULL, "cbtinitialtimeout",
375  CBT_MIN_TIMEOUT_INITIAL_VALUE,
376  CBT_MAX_TIMEOUT_INITIAL_VALUE);
377 
378  if (!(get_options()->LearnCircuitBuildTimeout)) {
379  log_debug(LD_BUG,
380  "circuit_build_times_initial_timeout() called, "
381  "cbtinitialtimeout is %d",
382  param);
383  }
384 
385  if (param < min) {
386  log_warn(LD_DIR, "Consensus parameter cbtinitialtimeout is too small, "
387  "raising to %d", min);
388  param = min;
389  }
390  return param;
391 }
392 
393 /**
394  * Retrieve and bounds-check the cbtrecentcount consensus parameter.
395  *
396  * Effect: This is the number of circuit build times to keep track of
397  * for deciding if we hit cbtmaxtimeouts and need to reset our state
398  * and learn a new timeout.
399  */
400 static int32_t
402 {
403  int32_t num;
404  num = networkstatus_get_param(ns, "cbtrecentcount",
406  CBT_MIN_RECENT_CIRCUITS,
407  CBT_MAX_RECENT_CIRCUITS);
408 
409  if (!(get_options()->LearnCircuitBuildTimeout)) {
410  log_debug(LD_BUG,
411  "circuit_build_times_recent_circuit_count() called, "
412  "cbtrecentcount is %d",
413  num);
414  }
415 
416  return num;
417 }
418 
419 /**
420  * This function is called when we get a consensus update.
421  *
422  * It checks to see if we have changed any consensus parameters
423  * that require reallocation or discard of previous stats.
424  */
425 void
427  const networkstatus_t *ns)
428 {
429  int32_t num;
430 
431  /*
432  * First check if we're doing adaptive timeouts at all; nothing to
433  * update if we aren't.
434  */
435 
438 
439  if (num > 0) {
440  if (num != cbt->liveness.num_recent_circs) {
441  int8_t *recent_circs;
442  if (cbt->liveness.num_recent_circs > 0) {
443  log_notice(LD_CIRC, "The Tor Directory Consensus has changed how "
444  "many circuits we must track to detect network failures "
445  "from %d to %d.", cbt->liveness.num_recent_circs, num);
446  } else {
447  log_notice(LD_CIRC, "Upon receiving a consensus directory, "
448  "re-enabling circuit-based network failure detection.");
449  }
450 
452  cbt->liveness.num_recent_circs == 0);
453 
454  /*
455  * Technically this is a circular array that we are reallocating
456  * and memcopying. However, since it only consists of either 1s
457  * or 0s, and is only used in a statistical test to determine when
458  * we should discard our history after a sufficient number of 1's
459  * have been reached, it is fine if order is not preserved or
460  * elements are lost.
461  *
462  * cbtrecentcount should only be changing in cases of severe network
463  * distress anyway, so memory correctness here is paramount over
464  * doing acrobatics to preserve the array.
465  */
466  recent_circs = tor_calloc(num, sizeof(int8_t));
468  cbt->liveness.num_recent_circs > 0) {
469  memcpy(recent_circs, cbt->liveness.timeouts_after_firsthop,
470  sizeof(int8_t)*MIN(num, cbt->liveness.num_recent_circs));
471  }
472 
473  // Adjust the index if it needs it.
474  if (num < cbt->liveness.num_recent_circs) {
475  cbt->liveness.after_firsthop_idx = MIN(num-1,
477  }
478 
480  cbt->liveness.timeouts_after_firsthop = recent_circs;
481  cbt->liveness.num_recent_circs = num;
482  }
483  /* else no change, nothing to do */
484  } else { /* num == 0 */
485  /*
486  * Weird. This probably shouldn't happen, so log a warning, but try
487  * to do something sensible anyway.
488  */
489 
490  log_warn(LD_CIRC,
491  "The cbtrecentcircs consensus parameter came back zero! "
492  "This disables adaptive timeouts since we can't keep track of "
493  "any recent circuits.");
494 
496  }
497  } else {
498  /*
499  * Adaptive timeouts are disabled; this might be because of the
500  * LearnCircuitBuildTimes config parameter, and hence permanent, or
501  * the cbtdisabled consensus parameter, so it may be a new condition.
502  * Treat it like getting num == 0 above and free the circuit history
503  * if we have any.
504  */
505 
507  }
508 }
509 
510 /**
511  * Return the initial default or configured timeout in milliseconds
512  */
513 static double
515 {
516  double timeout;
517  const or_options_t *options = get_options();
518 
519  /*
520  * Check if we have LearnCircuitBuildTimeout, and if we don't,
521  * always use CircuitBuildTimeout, no questions asked.
522  */
523  if (!unit_tests && options->CircuitBuildTimeout) {
524  timeout = options->CircuitBuildTimeout*1000;
525  if (!circuit_build_times_disabled(options) &&
527  log_warn(LD_CIRC, "Config CircuitBuildTimeout too low. Setting to %ds",
530  }
531  } else {
533  }
534 
535  return timeout;
536 }
537 
538 /**
539  * Reset the build time state.
540  *
541  * Leave estimated parameters, timeout and network liveness intact
542  * for future use.
543  */
544 void
546 {
547  memset(cbt->circuit_build_times, 0, sizeof(cbt->circuit_build_times));
548  cbt->total_build_times = 0;
549  cbt->build_times_idx = 0;
550  cbt->have_computed_timeout = 0;
551 
552  // Reset timeout and close counts
553  cbt->num_circ_succeeded = 0;
554  cbt->num_circ_closed = 0;
555  cbt->num_circ_timeouts = 0;
556 }
557 
558 /**
559  * Initialize the buildtimes structure for first use.
560  *
561  * Sets the initial timeout values based on either the config setting,
562  * the consensus param, or the default (CBT_DEFAULT_TIMEOUT_INITIAL_VALUE).
563  */
564 void
566 {
567  memset(cbt, 0, sizeof(*cbt));
568  /*
569  * Check if we really are using adaptive timeouts, and don't keep
570  * track of this stuff if not.
571  */
576  tor_calloc(cbt->liveness.num_recent_circs, sizeof(int8_t));
577  } else {
578  cbt->liveness.num_recent_circs = 0;
579  cbt->liveness.timeouts_after_firsthop = NULL;
580  }
582  cbt_control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
583 }
584 
585 /**
586  * Free the saved timeouts, if the cbtdisabled consensus parameter got turned
587  * on or something.
588  */
589 
590 void
592 {
593  if (!cbt) return;
594 
597  }
598 
599  cbt->liveness.num_recent_circs = 0;
600 }
601 
602 #if 0
603 /**
604  * Rewind our build time history by n positions.
605  */
606 static void
607 circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
608 {
609  int i = 0;
610 
611  cbt->build_times_idx -= n;
613 
614  for (i = 0; i < n; i++) {
617  }
618 
619  if (cbt->total_build_times > n) {
620  cbt->total_build_times -= n;
621  } else {
622  cbt->total_build_times = 0;
623  }
624 
625  log_info(LD_CIRC,
626  "Rewound history by %d places. Current index: %d. "
627  "Total: %d", n, cbt->build_times_idx, cbt->total_build_times);
628 }
629 #endif /* 0 */
630 
631 /**
632  * Mark this circuit as timed out, but change its purpose
633  * so that it continues to build, allowing us to measure
634  * its full build time.
635  */
636 void
638 {
640  CIRC_EVENT_FAILED,
641  END_CIRC_REASON_TIMEOUT);
644  /* Record this event to check for too many timeouts
645  * in a row. This function does not record a time value yet
646  * (we do that later); it only counts the fact that we did
647  * have a timeout. We also want to avoid double-counting
648  * already "relaxed" circuits, which are counted in
649  * circuit_expire_building(). */
650  if (!circ->relaxed_timeout) {
651  int first_hop_succeeded = circ->cpath &&
652  circ->cpath->state == CPATH_STATE_OPEN;
653 
656  first_hop_succeeded);
657  }
658 }
659 
660 /**
661  * Perform the build time work that needs to be done when a circuit
662  * completes a hop.
663  *
664  * This function decides if we should record a circuit's build time
665  * in our histogram data and other statistics, and if so, records it.
666  * It also will mark circuits that have already timed out as
667  * measurement-only circuits, so they can continue to build but
668  * not get used.
669  *
670  * For this, we want to consider circuits that will eventually make
671  * it to the third hop. For circuits longer than 3 hops, we want to
672  * record their build time when they reach the third hop, but let
673  * them continue (and not count them later). For circuits that are
674  * exactly 3 hops, this will count them when they are completed. We
675  * do this so that CBT is always gathering statistics on circuits
676  * of the same length, regardless of their type.
677  */
678 void
680 {
681  struct timeval end;
682  long timediff;
683 
684  /* If circuit build times are disabled, let circuit_expire_building()
685  * handle it.. */
687  return;
688  }
689 
690  /* Is this a circuit for which the timeout applies in a straight-forward
691  * way? If so, handle it below. If not, just return (and let
692  * circuit_expire_building() eventually take care of it).
693  */
695  return;
696  }
697 
698  tor_gettimeofday(&end);
699  timediff = tv_mdiff(&circ->base_.timestamp_began, &end);
700 
701  /* Check if we would have timed out already. If so, change the
702  * purpose here. But don't do any timeout handling here if there
703  * are no circuits opened yet. Save it for circuit_expire_building()
704  * (to allow it to handle timeout "relaxing" over there). */
705  if (timediff > get_circuit_build_timeout_ms() &&
707 
708  /* Circuits are allowed to last longer for measurement.
709  * Switch their purpose and wait. */
710  if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
711  log_info(LD_CIRC,
712  "Deciding to timeout circuit %"PRIu32,
713  (circ->global_identifier));
715  }
716  }
717 
718  /* If the circuit is built to exactly the DEFAULT_ROUTE_LEN,
719  * add it to our buildtimes. */
721  /* If the circuit build time is much greater than we would have cut
722  * it off at, we probably had a suspend event along this codepath,
723  * and we should discard the value.
724  */
725  if (timediff < 0 ||
726  timediff > 2*get_circuit_build_close_time_ms()+1000) {
727  log_notice(LD_CIRC, "Strange value for circuit build time: %ldmsec. "
728  "Assuming clock jump. Purpose %d (%s)", timediff,
729  circ->base_.purpose,
730  circuit_purpose_to_string(circ->base_.purpose));
731  } else {
732  /* Only count circuit times if the network is live */
736  (build_time_t)timediff);
738  }
739 
740  if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
743  }
744  }
745  }
746 }
747 
748 /**
749  * Add a new build time value <b>time</b> to the set of build times. Time
750  * units are milliseconds.
751  *
752  * circuit_build_times <b>cbt</b> is a circular array, so loop around when
753  * array is full.
754  */
755 int
757 {
758  if (btime <= 0 || btime > CBT_BUILD_TIME_MAX) {
759  log_warn(LD_BUG, "Circuit build time is too large (%u)."
760  "This is probably a bug.", btime);
762  return -1;
763  }
764 
765  log_debug(LD_CIRC, "Adding circuit build time %u", btime);
766 
767  cbt->circuit_build_times[cbt->build_times_idx] = btime;
770  cbt->total_build_times++;
771 
772  if ((cbt->total_build_times % CBT_SAVE_STATE_EVERY) == 0) {
773  /* Save state every n circuit builds */
774  if (!unit_tests && !get_options()->AvoidDiskWrites)
776  }
777 
778  return 0;
779 }
780 
781 /**
782  * Return maximum circuit build time
783  */
784 static build_time_t
786 {
787  int i = 0;
788  build_time_t max_build_time = 0;
789  for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
790  if (cbt->circuit_build_times[i] > max_build_time
792  max_build_time = cbt->circuit_build_times[i];
793  }
794  return max_build_time;
795 }
796 
797 #if 0
798 /** Return minimum circuit build time */
800 circuit_build_times_min(circuit_build_times_t *cbt)
801 {
802  int i = 0;
803  build_time_t min_build_time = CBT_BUILD_TIME_MAX;
804  for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
805  if (cbt->circuit_build_times[i] && /* 0 <-> uninitialized */
806  cbt->circuit_build_times[i] < min_build_time)
807  min_build_time = cbt->circuit_build_times[i];
808  }
809  if (min_build_time == CBT_BUILD_TIME_MAX) {
810  log_warn(LD_CIRC, "No build times less than CBT_BUILD_TIME_MAX!");
811  }
812  return min_build_time;
813 }
814 #endif /* 0 */
815 
816 /**
817  * Calculate and return a histogram for the set of build times.
818  *
819  * Returns an allocated array of histrogram bins representing
820  * the frequency of index*CBT_BIN_WIDTH millisecond
821  * build times. Also outputs the number of bins in nbins.
822  *
823  * The return value must be freed by the caller.
824  */
825 static uint32_t *
827  build_time_t *nbins)
828 {
829  uint32_t *histogram;
830  build_time_t max_build_time = circuit_build_times_max(cbt);
831  int i, c;
832 
833  *nbins = 1 + (max_build_time / CBT_BIN_WIDTH);
834  histogram = tor_calloc(*nbins, sizeof(build_time_t));
835 
836  // calculate histogram
837  for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
838  if (cbt->circuit_build_times[i] == 0
840  continue; /* 0 <-> uninitialized */
841 
842  c = (cbt->circuit_build_times[i] / CBT_BIN_WIDTH);
843  histogram[c]++;
844  }
845 
846  return histogram;
847 }
848 
849 /**
850  * Return the Pareto start-of-curve parameter Xm.
851  *
852  * Because we are not a true Pareto curve, we compute this as the
853  * weighted average of the 10 most frequent build time bins. This
854  * heuristic allowed for the actual timeout rate to be closest
855  * to the chosen quantile cutoff, for quantiles 60-80%, out of
856  * many variant approaches (see #40157 for analysis).
857  */
860 {
861  build_time_t nbins = 0;
862  build_time_t *nth_max_bin;
863  build_time_t xm_total = 0;
864  build_time_t Xm = 0;
865  int32_t xm_counts=0;
867  uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins);
868 
869  tor_assert(nbins > 0);
870  tor_assert(num_modes > 0);
871 
872  nth_max_bin = tor_calloc(num_modes, sizeof(build_time_t));
873 
874  /* Determine the N most common build times, by selecting the
875  * nth largest mode, counting it, and removing it from the histogram. */
876  for (int n = 0; n < num_modes; n++) {
877  /* Get nth mode */
878  for (build_time_t i = 0; i < nbins; i++) {
879  if (histogram[i] > histogram[nth_max_bin[n]]) {
880  nth_max_bin[n] = i;
881  }
882  }
883 
884  /* Update average */
885  xm_counts += histogram[nth_max_bin[n]];
886  xm_total += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]];
887 
888  /* Prevent from re-counting this value */
889  histogram[nth_max_bin[n]] = 0;
890  }
891 
892  /* xm_counts can become zero if all of our last CBT_NCIRCUITS_TO_OBSERVE
893  * circuits were abandoned before they completed. This shouldn't happen,
894  * though. We should have reset/re-learned a lower timeout first. */
895  if (xm_counts == 0) {
896  log_warn(LD_CIRC,
897  "No valid circuit build time data out of %d times, %u modes, "
898  "have_timeout=%d, %lfms", cbt->total_build_times, num_modes,
899  cbt->have_computed_timeout, cbt->timeout_ms);
900  goto done;
901  }
902 
903  Xm = xm_total / xm_counts;
904 
905  done:
906  tor_free(histogram);
907  tor_free(nth_max_bin);
908 
909  return Xm;
910 }
911 
912 /**
913  * Output a histogram of current circuit build times to
914  * the or_state_t state structure.
915  */
916 void
918  or_state_t *state)
919 {
920  uint32_t *histogram;
921  build_time_t i = 0;
922  build_time_t nbins = 0;
923  config_line_t **next, *line;
924 
925  histogram = circuit_build_times_create_histogram(cbt, &nbins);
926  // write to state
927  config_free_lines(state->BuildtimeHistogram);
928  next = &state->BuildtimeHistogram;
929  *next = NULL;
930 
931  state->TotalBuildTimes = cbt->total_build_times;
932  state->CircuitBuildAbandonedCount = 0;
933 
934  for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
936  state->CircuitBuildAbandonedCount++;
937  }
938 
939  for (i = 0; i < nbins; i++) {
940  // compress the histogram by skipping the blanks
941  if (histogram[i] == 0) continue;
942  *next = line = tor_malloc_zero(sizeof(config_line_t));
943  line->key = tor_strdup("CircuitBuildTimeBin");
944  tor_asprintf(&line->value, "%d %d",
945  CBT_BIN_TO_MS(i), histogram[i]);
946  next = &(line->next);
947  }
948 
949  if (!unit_tests) {
952  }
953 
954  tor_free(histogram);
955 }
956 
957 /**
958  * Shuffle the build times array.
959  *
960  * Adapted from https://en.wikipedia.org/wiki/Fisher-Yates_shuffle
961  */
962 static void
964  build_time_t *raw_times,
965  uint32_t num_times)
966 {
967  uint32_t n = num_times;
968  if (num_times > CBT_NCIRCUITS_TO_OBSERVE) {
969  log_notice(LD_CIRC, "The number of circuit times that this Tor version "
970  "uses to calculate build times is less than the number stored "
971  "in your state file. Decreasing the circuit time history from "
972  "%lu to %d.", (unsigned long)num_times,
974  }
975 
976  if (n > INT_MAX-1) {
977  log_warn(LD_CIRC, "For some insane reasons, you had %lu circuit build "
978  "observations in your state file. That's far too many; probably "
979  "there's a bug here.", (unsigned long)n);
980  n = INT_MAX-1;
981  }
982 
983  /* This code can only be run on a compact array */
984  while (n-- > 1) {
985  int k = crypto_rand_int(n + 1); /* 0 <= k <= n. */
986  build_time_t tmp = raw_times[k];
987  raw_times[k] = raw_times[n];
988  raw_times[n] = tmp;
989  }
990 
991  /* Since the times are now shuffled, take a random CBT_NCIRCUITS_TO_OBSERVE
992  * subset (ie the first CBT_NCIRCUITS_TO_OBSERVE values) */
993  for (n = 0; n < MIN(num_times, CBT_NCIRCUITS_TO_OBSERVE); n++) {
994  circuit_build_times_add_time(cbt, raw_times[n]);
995  }
996 }
997 
998 /**
999  * Load histogram from <b>state</b>, shuffling the resulting array
1000  * after we do so. Use this result to estimate parameters and
1001  * calculate the timeout.
1002  *
1003  * Return -1 on error.
1004  */
1005 int
1007  or_state_t *state)
1008 {
1009  int tot_values = 0;
1010  uint32_t loaded_cnt = 0, N = 0;
1011  config_line_t *line;
1012  int i;
1013  build_time_t *loaded_times;
1014  int err = 0;
1016 
1018  return 0;
1019  }
1020 
1021  /* We had a case where someone removed their TotalBuildTimes from the state
1022  * files while having CircuitBuildAbandonedCount above 0 leading to a
1023  * segfault (#40437). Simply bug on it and return an error so at least the
1024  * user will learn that they broke the state file. */
1025  if (BUG(state->TotalBuildTimes <= 0 &&
1026  state->CircuitBuildAbandonedCount > 0)) {
1027  log_warn(LD_GENERAL, "CircuitBuildAbandonedCount count is above 0 but "
1028  "no TotalBuildTimes have been found. Unable to "
1029  "parse broken state file");
1030  return -1;
1031  }
1032 
1033  /* build_time_t 0 means uninitialized */
1034  loaded_times = tor_calloc(state->TotalBuildTimes, sizeof(build_time_t));
1035 
1036  for (line = state->BuildtimeHistogram; line; line = line->next) {
1037  smartlist_t *args = smartlist_new();
1038  smartlist_split_string(args, line->value, " ",
1039  SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0);
1040  if (smartlist_len(args) < 2) {
1041  log_warn(LD_GENERAL, "Unable to parse circuit build times: "
1042  "Too few arguments to CircuitBuildTime");
1043  err = 1;
1044  SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1045  smartlist_free(args);
1046  break;
1047  } else {
1048  const char *ms_str = smartlist_get(args,0);
1049  const char *count_str = smartlist_get(args,1);
1050  uint32_t count, k;
1051  build_time_t ms;
1052  int ok;
1053  ms = (build_time_t)tor_parse_ulong(ms_str, 10, 0,
1054  CBT_BUILD_TIME_MAX, &ok, NULL);
1055  if (!ok) {
1056  log_warn(LD_GENERAL, "Unable to parse circuit build times: "
1057  "Unparsable bin number");
1058  err = 1;
1059  SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1060  smartlist_free(args);
1061  break;
1062  }
1063  count = (uint32_t)tor_parse_ulong(count_str, 10, 0,
1064  UINT32_MAX, &ok, NULL);
1065  if (!ok) {
1066  log_warn(LD_GENERAL, "Unable to parse circuit build times: "
1067  "Unparsable bin count");
1068  err = 1;
1069  SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1070  smartlist_free(args);
1071  break;
1072  }
1073 
1074  if (loaded_cnt+count+ (unsigned)state->CircuitBuildAbandonedCount
1075  > (unsigned) state->TotalBuildTimes) {
1076  log_warn(LD_CIRC,
1077  "Too many build times in state file. "
1078  "Stopping short before %d",
1079  loaded_cnt+count);
1080  SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1081  smartlist_free(args);
1082  break;
1083  }
1084 
1085  for (k = 0; k < count; k++) {
1086  loaded_times[loaded_cnt++] = ms;
1087  }
1088  N++;
1089  SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1090  smartlist_free(args);
1091  }
1092  }
1093 
1094  log_info(LD_CIRC,
1095  "Adding %d timeouts.", state->CircuitBuildAbandonedCount);
1096  for (i=0; i < state->CircuitBuildAbandonedCount; i++) {
1097  loaded_times[loaded_cnt++] = CBT_BUILD_ABANDONED;
1098  }
1099 
1100  if (loaded_cnt != (unsigned)state->TotalBuildTimes) {
1101  log_warn(LD_CIRC,
1102  "Corrupt state file? Build times count mismatch. "
1103  "Read %d times, but file says %d", loaded_cnt,
1104  state->TotalBuildTimes);
1105  err = 1;
1107  goto done;
1108  }
1109 
1110  circuit_build_times_shuffle_and_store_array(cbt, loaded_times, loaded_cnt);
1111 
1112  /* Verify that we didn't overwrite any indexes */
1113  for (i=0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
1114  if (!cbt->circuit_build_times[i])
1115  break;
1116  tot_values++;
1117  }
1118  log_info(LD_CIRC,
1119  "Loaded %d/%d values from %d lines in circuit time histogram",
1120  tot_values, cbt->total_build_times, N);
1121 
1122  if (cbt->total_build_times != tot_values
1124  log_warn(LD_CIRC,
1125  "Corrupt state file? Shuffled build times mismatch. "
1126  "Read %d times, but file says %d", tot_values,
1127  state->TotalBuildTimes);
1128  err = 1;
1130  goto done;
1131  }
1132 
1134 
1135  done:
1136  tor_free(loaded_times);
1137  return err ? -1 : 0;
1138 }
1139 
1140 /**
1141  * Estimates the Xm and Alpha parameters using
1142  * https://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation
1143  *
1144  * The notable difference is that we use mode instead of min to estimate Xm.
1145  * This is because our distribution is frechet-like. We claim this is
1146  * an acceptable approximation because we are only concerned with the
1147  * accuracy of the CDF of the tail.
1148  */
1149 STATIC int
1151 {
1153  double a = 0;
1154  int n=0,i=0,abandoned_count=0;
1155 
1156  /* https://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */
1157  /* We sort of cheat here and make our samples slightly more pareto-like
1158  * and less frechet-like. */
1159  cbt->Xm = circuit_build_times_get_xm(cbt);
1160 
1161  /* If Xm came back 0, then too many circuits were abandoned. */
1162  if (cbt->Xm == 0)
1163  return 0;
1164 
1165  tor_assert(cbt->Xm > 0);
1166 
1167  for (i=0; i< CBT_NCIRCUITS_TO_OBSERVE; i++) {
1168  if (!x[i]) {
1169  continue;
1170  }
1171 
1172  if (x[i] < cbt->Xm) {
1173  a += tor_mathlog(cbt->Xm);
1174  n++;
1175  } else if (x[i] == CBT_BUILD_ABANDONED) {
1176  abandoned_count++;
1177  } else {
1178  a += tor_mathlog(x[i]);
1179  n++;
1180  }
1181  }
1182 
1183  /*
1184  * We are erring and asserting here because this can only happen
1185  * in codepaths other than startup. The startup state parsing code
1186  * performs this same check, and resets state if it hits it. If we
1187  * hit it at runtime, something serious has gone wrong.
1188  */
1189  if (n!=cbt->total_build_times-abandoned_count) {
1190  log_err(LD_CIRC, "Discrepancy in build times count: %d vs %d", n,
1191  cbt->total_build_times);
1192  }
1193  tor_assert_nonfatal(n==cbt->total_build_times-abandoned_count);
1194 
1195  /* This is the "Maximum Likelihood Estimator" for parameter alpha of a Pareto
1196  * Distribution. See:
1197  * https://en.wikipedia.org/wiki/Pareto_distribution#Estimation_of_parameters
1198  *
1199  * The division in the estimator is done with subtraction outside the ln(),
1200  * with the sum occurring in the for loop above.
1201  *
1202  * This done is to avoid the precision issues of logs of small values.
1203  */
1204  a -= n*tor_mathlog(cbt->Xm);
1205  a = n/a;
1206 
1207  cbt->alpha = a;
1208 
1209  return 1;
1210 }
1211 
1212 /**
1213  * This is the Pareto Quantile Function. It calculates the point x
1214  * in the distribution such that F(x) = quantile (ie quantile*100%
1215  * of the mass of the density function is below x on the curve).
1216  *
1217  * We use it to calculate the timeout and also to generate synthetic
1218  * values of time for circuits that timeout before completion.
1219  *
1220  * See https://en.wikipedia.org/wiki/Quantile_function,
1221  * https://en.wikipedia.org/wiki/Inverse_transform_sampling and
1222  * https://en.wikipedia.org/wiki/Pareto_distribution#Generating_a_
1223  * random_sample_from_Pareto_distribution
1224  * That's right. I'll cite wikipedia all day long.
1225  *
1226  * Return value is in milliseconds, clamped to INT32_MAX.
1227  */
1228 STATIC double
1230  double quantile)
1231 {
1232  double ret;
1233  tor_assert(quantile >= 0);
1234  tor_assert(1.0-quantile > 0);
1235  tor_assert(cbt->Xm > 0);
1236 
1237  /* If either alpha or p are 0, we would divide by zero, yielding an
1238  * infinite (double) result; which would be clamped to INT32_MAX.
1239  * Instead, initialise ret to INT32_MAX, and skip over these
1240  * potentially illegal/trapping divides by zero.
1241  */
1242  ret = INT32_MAX;
1243 
1244  if (cbt->alpha > 0) {
1245  double p;
1246  p = pow(1.0-quantile,1.0/cbt->alpha);
1247  if (p > 0) {
1248  ret = cbt->Xm/p;
1249  }
1250  }
1251 
1252  if (ret > INT32_MAX) {
1253  ret = INT32_MAX;
1254  }
1255  tor_assert(ret > 0);
1256  return ret;
1257 }
1258 
1259 #ifdef TOR_UNIT_TESTS
1260 /** Pareto CDF */
1261 double
1262 circuit_build_times_cdf(circuit_build_times_t *cbt, double x)
1263 {
1264  double ret;
1265  tor_assert(cbt->Xm > 0);
1266  ret = 1.0-pow(cbt->Xm/x,cbt->alpha);
1267  tor_assert(0 <= ret && ret <= 1.0);
1268  return ret;
1269 }
1270 #endif /* defined(TOR_UNIT_TESTS) */
1271 
1272 #ifdef TOR_UNIT_TESTS
1273 /**
1274  * Generate a synthetic time using our distribution parameters.
1275  *
1276  * The return value will be within the [q_lo, q_hi) quantile points
1277  * on the CDF.
1278  */
1280 circuit_build_times_generate_sample(circuit_build_times_t *cbt,
1281  double q_lo, double q_hi)
1282 {
1283  double randval = crypto_rand_double();
1284  build_time_t ret;
1285  double u;
1286 
1287  /* Generate between [q_lo, q_hi) */
1288  /*XXXX This is what nextafter is supposed to be for; we should use it on the
1289  * platforms that support it. */
1290  q_hi -= 1.0/(INT32_MAX);
1291 
1292  tor_assert(q_lo >= 0);
1293  tor_assert(q_hi < 1);
1294  tor_assert(q_lo < q_hi);
1295 
1296  u = q_lo + (q_hi-q_lo)*randval;
1297 
1298  tor_assert(0 <= u && u < 1.0);
1299  /* circuit_build_times_calculate_timeout returns <= INT32_MAX */
1300  ret = (build_time_t)
1302  tor_assert(ret > 0);
1303  return ret;
1304 }
1305 #endif /* defined(TOR_UNIT_TESTS) */
1306 
1307 #ifdef TOR_UNIT_TESTS
1308 /**
1309  * Estimate an initial alpha parameter by solving the quantile
1310  * function with a quantile point and a specific timeout value.
1311  */
1312 void
1313 circuit_build_times_initial_alpha(circuit_build_times_t *cbt,
1314  double quantile, double timeout_ms)
1315 {
1316  // Q(u) = Xm/((1-u)^(1/a))
1317  // Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout
1318  // CircBuildTimeout = Xm/((1-0.8))^(1/a))
1319  // CircBuildTimeout = Xm*((1-0.8))^(-1/a))
1320  // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a)
1321  // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a
1322  tor_assert(quantile >= 0);
1323  tor_assert(cbt->Xm > 0);
1324  cbt->alpha = tor_mathlog(1.0-quantile)/
1325  (tor_mathlog(cbt->Xm)-tor_mathlog(timeout_ms));
1326  tor_assert(cbt->alpha > 0);
1327 }
1328 #endif /* defined(TOR_UNIT_TESTS) */
1329 
1330 /**
1331  * Returns true if we need circuits to be built
1332  */
1333 int
1335 {
1336  /* Return true if < MIN_CIRCUITS_TO_OBSERVE */
1338 }
1339 
1340 /**
1341  * Returns true if we should build a timeout test circuit
1342  * right now.
1343  */
1344 int
1346 {
1347  return circuit_build_times_needs_circuits(cbt) &&
1349 }
1350 
1351 /**
1352  * How long should we be unreachable before we think we need to check if
1353  * our published IP address has changed.
1354  */
1355 #define CIRCUIT_TIMEOUT_BEFORE_RECHECK_IP (60*3)
1356 
1357 /**
1358  * Called to indicate that the network showed some signs of liveness,
1359  * i.e. we received a cell.
1360  *
1361  * This is used by circuit_build_times_network_check_live() to decide
1362  * if we should record the circuit build timeout or not.
1363  *
1364  * This function is called every time we receive a cell. Avoid
1365  * syscalls, events, and other high-intensity work.
1366  */
1367 void
1369 {
1370  time_t now = approx_time();
1371  // XXXX this should use pubsub
1372  if (cbt->liveness.nonlive_timeouts > 0) {
1373  time_t time_since_live = now - cbt->liveness.network_last_live;
1374  log_notice(LD_CIRC,
1375  "Tor now sees network activity. Restoring circuit build "
1376  "timeout recording. Network was down for %d seconds "
1377  "during %d circuit attempts.",
1378  (int)time_since_live,
1379  cbt->liveness.nonlive_timeouts);
1380  if (time_since_live > CIRCUIT_TIMEOUT_BEFORE_RECHECK_IP)
1382  }
1383  cbt->liveness.network_last_live = now;
1384  cbt->liveness.nonlive_timeouts = 0;
1385 
1386  /* Tell control.c */
1388 }
1389 
1390 /**
1391  * Non-destructively scale all of our circuit success, timeout, and close
1392  * counts down by a factor of two. Scaling in this way preserves the
1393  * ratios between succeeded vs timed out vs closed circuits, so that
1394  * our statistics don't change when we scale.
1395  *
1396  * This is used only in the rare event that we build more than
1397  * INT32_MAX circuits. Since the num_circ_* variables are
1398  * uint32_t, we won't even be close to overflowing them.
1399  */
1400 void
1402 {
1403  cbt->num_circ_succeeded /= 2;
1404  cbt->num_circ_timeouts /= 2;
1405  cbt->num_circ_closed /= 2;
1406 }
1407 
1408 /**
1409  * Called to indicate that we "completed" a circuit. Because this circuit
1410  * succeeded, it doesn't count as a timeout-after-the-first-hop.
1411  *
1412  * (For the purposes of the cbt code, we consider a circuit "completed" if
1413  * it has 3 hops, regardless of its final hop count. We do this because
1414  * we're trying to answer the question, "how long should a circuit take to
1415  * reach the 3-hop count".)
1416  *
1417  * This is used by circuit_build_times_network_check_changed() to determine
1418  * if we had too many recent timeouts and need to reset our learned timeout
1419  * to something higher.
1420  */
1421 void
1423 {
1424  // Count circuit success
1425  cbt->num_circ_succeeded++;
1426 
1427  // If we're going to wrap int32, scale everything
1428  if (cbt->num_circ_succeeded >= INT32_MAX) {
1430  }
1431 
1432  /* Check for NULLness because we might not be using adaptive timeouts */
1433  if (cbt->liveness.timeouts_after_firsthop &&
1434  cbt->liveness.num_recent_circs > 0) {
1436  = 0;
1439  }
1440 }
1441 
1442 /**
1443  * A circuit just timed out. If it failed after the first hop, record it
1444  * in our history for later deciding if the network speed has changed.
1445  *
1446  * This is used by circuit_build_times_network_check_changed() to determine
1447  * if we had too many recent timeouts and need to reset our learned timeout
1448  * to something higher.
1449  */
1450 static void
1452  int did_onehop)
1453 {
1454  // Count circuit timeout
1455  cbt->num_circ_timeouts++;
1456 
1457  // If we're going to wrap int32, scale everything
1458  if (cbt->num_circ_timeouts >= INT32_MAX) {
1460  }
1461 
1462  /* Check for NULLness because we might not be using adaptive timeouts */
1463  if (cbt->liveness.timeouts_after_firsthop &&
1464  cbt->liveness.num_recent_circs > 0) {
1465  if (did_onehop) {
1467  = 1;
1470  }
1471  }
1472 }
1473 
1474 /**
1475  * A circuit was just forcibly closed. If there has been no recent network
1476  * activity at all, but this circuit was launched back when we thought the
1477  * network was live, increment the number of "nonlive" circuit timeouts.
1478  *
1479  * This is used by circuit_build_times_network_check_live() to decide
1480  * if we should record the circuit build timeout or not.
1481  */
1482 static void
1484  int did_onehop, time_t start_time)
1485 {
1486  time_t now = time(NULL);
1487 
1488  // Count circuit close
1489  cbt->num_circ_closed++;
1490 
1491  // If we're going to wrap int32, scale everything
1492  if (cbt->num_circ_closed >= INT32_MAX) {
1494  }
1495 
1496  /*
1497  * Check if this is a timeout that was for a circuit that spent its
1498  * entire existence during a time where we have had no network activity.
1499  */
1500  if (cbt->liveness.network_last_live < start_time) {
1501  if (did_onehop) {
1502  char last_live_buf[ISO_TIME_LEN+1];
1503  char start_time_buf[ISO_TIME_LEN+1];
1504  char now_buf[ISO_TIME_LEN+1];
1505  format_local_iso_time(last_live_buf, cbt->liveness.network_last_live);
1506  format_local_iso_time(start_time_buf, start_time);
1507  format_local_iso_time(now_buf, now);
1508  log_notice(LD_CIRC,
1509  "A circuit somehow completed a hop while the network was "
1510  "not live. The network was last live at %s, but the circuit "
1511  "launched at %s. It's now %s. This could mean your clock "
1512  "changed.", last_live_buf, start_time_buf, now_buf);
1513  }
1514  cbt->liveness.nonlive_timeouts++;
1515  if (cbt->liveness.nonlive_timeouts == 1) {
1516  log_notice(LD_CIRC,
1517  "Tor has not observed any network activity for the past %d "
1518  "seconds. Disabling circuit build timeout recording.",
1519  (int)(now - cbt->liveness.network_last_live));
1520 
1521  /* Tell control.c */
1523  } else {
1524  log_info(LD_CIRC,
1525  "Got non-live timeout. Current count is: %d",
1526  cbt->liveness.nonlive_timeouts);
1527  }
1528  }
1529 }
1530 
1531 /**
1532  * When the network is not live, we do not record circuit build times.
1533  *
1534  * The network is considered not live if there has been at least one
1535  * circuit build that began and ended (had its close_ms measurement
1536  * period expire) since we last received a cell.
1537  *
1538  * Also has the side effect of rewinding the circuit time history
1539  * in the case of recent liveness changes.
1540  */
1541 int
1543 {
1544  if (cbt->liveness.nonlive_timeouts > 0) {
1545  return 0;
1546  }
1547 
1548  return 1;
1549 }
1550 
1551 /**
1552  * Returns true if we have seen more than MAX_RECENT_TIMEOUT_COUNT of
1553  * the past RECENT_CIRCUITS time out after the first hop. Used to detect
1554  * if the network connection has changed significantly, and if so,
1555  * resets our circuit build timeout to the default.
1556  *
1557  * Also resets the entire timeout history in this case and causes us
1558  * to restart the process of building test circuits and estimating a
1559  * new timeout.
1560  */
1561 STATIC int
1563 {
1564  int total_build_times = cbt->total_build_times;
1565  int timeout_count=0;
1566  int i;
1567 
1568  if (cbt->liveness.timeouts_after_firsthop &&
1569  cbt->liveness.num_recent_circs > 0) {
1570  /* how many of our recent circuits made it to the first hop but then
1571  * timed out? */
1572  for (i = 0; i < cbt->liveness.num_recent_circs; i++) {
1573  timeout_count += cbt->liveness.timeouts_after_firsthop[i];
1574  }
1575  }
1576 
1577  /* If 80% of our recent circuits are timing out after the first hop,
1578  * we need to re-estimate a new initial alpha and timeout. */
1579  if (timeout_count < circuit_build_times_max_timeouts()) {
1580  return 0;
1581  }
1582 
1584  if (cbt->liveness.timeouts_after_firsthop &&
1585  cbt->liveness.num_recent_circs > 0) {
1586  memset(cbt->liveness.timeouts_after_firsthop, 0,
1587  sizeof(*cbt->liveness.timeouts_after_firsthop)*
1588  cbt->liveness.num_recent_circs);
1589  }
1590  cbt->liveness.after_firsthop_idx = 0;
1591 
1592 #define MAX_TIMEOUT ((int32_t) (INT32_MAX/2))
1593  /* Check to see if this has happened before. If so, double the timeout
1594  * to give clients on abysmally bad network connections a shot at access */
1596  if (cbt->timeout_ms > MAX_TIMEOUT || cbt->close_ms > MAX_TIMEOUT) {
1597  log_warn(LD_CIRC, "Insanely large circuit build timeout value. "
1598  "(timeout = %fmsec, close = %fmsec)",
1599  cbt->timeout_ms, cbt->close_ms);
1600  } else {
1601  cbt->timeout_ms *= 2;
1602  cbt->close_ms *= 2;
1603  }
1604  } else {
1605  cbt->close_ms = cbt->timeout_ms
1607  }
1608 #undef MAX_TIMEOUT
1609 
1610  cbt_control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
1611 
1612  log_notice(LD_CIRC,
1613  "Your network connection speed appears to have changed. Resetting "
1614  "timeout to %ldms after %d timeouts and %d buildtimes.",
1615  tor_lround(cbt->timeout_ms), timeout_count, total_build_times);
1616 
1617  return 1;
1618 }
1619 
1620 /**
1621  * Count the number of timeouts in a set of cbt data.
1622  */
1623 double
1625 {
1626  int i=0,timeouts=0;
1627  for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
1628  if (cbt->circuit_build_times[i] >= cbt->timeout_ms) {
1629  timeouts++;
1630  }
1631  }
1632 
1633  if (!cbt->total_build_times)
1634  return 0;
1635 
1636  return ((double)timeouts)/cbt->total_build_times;
1637 }
1638 
1639 /**
1640  * Count the number of closed circuits in a set of cbt data.
1641  */
1642 double
1644 {
1645  int i=0,closed=0;
1646  for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
1647  if (cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED) {
1648  closed++;
1649  }
1650  }
1651 
1652  if (!cbt->total_build_times)
1653  return 0;
1654 
1655  return ((double)closed)/cbt->total_build_times;
1656 }
1657 
1658 /**
1659  * Store a timeout as a synthetic value.
1660  *
1661  * Returns true if the store was successful and we should possibly
1662  * update our timeout estimate.
1663  */
1664 int
1666  int did_onehop,
1667  time_t start_time)
1668 {
1670  cbt->close_ms = cbt->timeout_ms
1672  return 0;
1673  }
1674 
1675  /* Record this force-close to help determine if the network is dead */
1676  circuit_build_times_network_close(cbt, did_onehop, start_time);
1677 
1678  /* Only count timeouts if network is live.. */
1680  return 0;
1681  }
1682 
1684  return 1;
1685 }
1686 
1687 /**
1688  * Update timeout counts to determine if we need to expire
1689  * our build time history due to excessive timeouts.
1690  *
1691  * We do not record any actual time values at this stage;
1692  * we are only interested in recording the fact that a timeout
1693  * happened. We record the time values via
1694  * circuit_build_times_count_close() and circuit_build_times_add_time().
1695  */
1696 void
1698  int did_onehop)
1699 {
1701  cbt->close_ms = cbt->timeout_ms
1703  return;
1704  }
1705 
1706  /* Register the fact that a timeout just occurred. */
1707  circuit_build_times_network_timeout(cbt, did_onehop);
1708 
1709  /* If there are a ton of timeouts, we should reset
1710  * the circuit build timeout. */
1712 }
1713 
1714 /**
1715  * Estimate a new timeout based on history and set our timeout
1716  * variable accordingly.
1717  */
1718 static int
1720 {
1721  build_time_t max_time;
1723  return 0;
1724 
1726  return 0;
1727 
1730 
1733 
1734  max_time = circuit_build_times_max(cbt);
1735 
1736  if (cbt->timeout_ms > max_time) {
1737  log_info(LD_CIRC,
1738  "Circuit build timeout of %dms is beyond the maximum build "
1739  "time we have ever observed. Capping it to %dms.",
1740  (int)cbt->timeout_ms, max_time);
1741  cbt->timeout_ms = max_time;
1742  }
1743 
1744  if (max_time < INT32_MAX/2 && cbt->close_ms > 2*max_time) {
1745  log_info(LD_CIRC,
1746  "Circuit build measurement period of %dms is more than twice "
1747  "the maximum build time we have ever observed. Capping it to "
1748  "%dms.", (int)cbt->close_ms, 2*max_time);
1749  cbt->close_ms = 2*max_time;
1750  }
1751 
1752  /* Sometimes really fast guard nodes give us such a steep curve
1753  * that this ends up being not that much greater than timeout_ms.
1754  * Make it be at least 1 min to handle this case. */
1756 
1757  cbt->have_computed_timeout = 1;
1758  return 1;
1759 }
1760 
1761 /**
1762  * Exposed function to compute a new timeout. Dispatches events and
1763  * also filters out extremely high timeout values.
1764  */
1765 void
1767 {
1768  long prev_timeout = tor_lround(cbt->timeout_ms/1000);
1769  double timeout_rate;
1770 
1771  /*
1772  * Just return if we aren't using adaptive timeouts
1773  */
1775  return;
1776 
1778  return;
1779 
1781  log_notice(LD_CIRC, "Set buildtimeout to low value %fms. Setting to %dms",
1784  if (cbt->close_ms < cbt->timeout_ms) {
1785  /* This shouldn't happen because of MAX() in timeout_worker above,
1786  * but doing it just in case */
1788  }
1789  }
1790 
1791  cbt_control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_COMPUTED);
1792 
1793  timeout_rate = circuit_build_times_timeout_rate(cbt);
1794 
1795  if (prev_timeout > tor_lround(cbt->timeout_ms/1000)) {
1796  log_info(LD_CIRC,
1797  "Based on %d circuit times, it looks like we don't need to "
1798  "wait so long for circuits to finish. We will now assume a "
1799  "circuit is too slow to use after waiting %ld milliseconds.",
1800  cbt->total_build_times,
1801  tor_lround(cbt->timeout_ms));
1802  log_info(LD_CIRC,
1803  "Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
1804  cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha,
1805  timeout_rate);
1806  } else if (prev_timeout < tor_lround(cbt->timeout_ms/1000)) {
1807  log_info(LD_CIRC,
1808  "Based on %d circuit times, it looks like we need to wait "
1809  "longer for circuits to finish. We will now assume a "
1810  "circuit is too slow to use after waiting %ld milliseconds.",
1811  cbt->total_build_times,
1812  tor_lround(cbt->timeout_ms));
1813  log_info(LD_CIRC,
1814  "Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
1815  cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha,
1816  timeout_rate);
1817  } else {
1818  log_info(LD_CIRC,
1819  "Set circuit build timeout to %ldms (%fms, %fms, Xm: %d, a: %f,"
1820  " r: %f) based on %d circuit times",
1821  tor_lround(cbt->timeout_ms),
1822  cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha, timeout_rate,
1823  cbt->total_build_times);
1824  }
1825 }
1826 
1827 #ifdef TOR_UNIT_TESTS
1828 /** Make a note that we're running unit tests (rather than running Tor
1829  * itself), so we avoid clobbering our state file. */
1830 void
1831 circuitbuild_running_unit_tests(void)
1832 {
1833  unit_tests = 1;
1834 }
1835 #endif /* defined(TOR_UNIT_TESTS) */
1836 
1837 void
1838 circuit_build_times_update_last_circ(circuit_build_times_t *cbt)
1839 {
1840  cbt->last_circ_at = approx_time();
1841 }
time_t approx_time(void)
Definition: approx_time.c:32
int authdir_mode(const or_options_t *options)
Definition: authmode.c:25
Header file for directory authority mode.
int circuit_timeout_want_to_count_circ(const origin_circuit_t *circ)
Definition: circuitbuild.c:825
Header file for circuitbuild.c.
int circuit_any_opened_circuits_cached(void)
Definition: circuitlist.c:766
const char * circuit_purpose_to_string(uint8_t purpose)
Definition: circuitlist.c:914
int circuit_get_cpath_opened_len(const origin_circuit_t *circ)
Definition: circuitlist.c:2022
int circuit_event_status(origin_circuit_t *circ, circuit_status_event_t tp, int reason_code)
Definition: circuitlist.c:510
Header file for circuitlist.c.
#define CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT
Definition: circuitlist.h:93
int circuit_build_times_needs_circuits_now(const circuit_build_times_t *cbt)
void circuit_build_times_free_timeouts(circuit_build_times_t *cbt)
Definition: circuitstats.c:591
#define CIRCUIT_TIMEOUT_BEFORE_RECHECK_IP
void circuit_build_times_network_is_live(circuit_build_times_t *cbt)
void circuit_build_times_handle_completed_hop(origin_circuit_t *circ)
Definition: circuitstats.c:679
void circuit_build_times_set_timeout(circuit_build_times_t *cbt)
double get_circuit_build_timeout_ms(void)
Definition: circuitstats.c:101
STATIC double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, double quantile)
int circuit_build_times_parse_state(circuit_build_times_t *cbt, or_state_t *state)
int circuit_build_times_network_check_live(const circuit_build_times_t *cbt)
STATIC int circuit_build_times_network_check_changed(circuit_build_times_t *cbt)
static uint32_t * circuit_build_times_create_histogram(const circuit_build_times_t *cbt, build_time_t *nbins)
Definition: circuitstats.c:826
int circuit_build_times_disabled_(const or_options_t *options, int ignore_consensus)
Definition: circuitstats.c:124
void circuit_build_times_count_timeout(circuit_build_times_t *cbt, int did_onehop)
static void circuit_build_times_shuffle_and_store_array(circuit_build_times_t *cbt, build_time_t *raw_times, uint32_t num_times)
Definition: circuitstats.c:963
const circuit_build_times_t * get_circuit_build_times(void)
Definition: circuitstats.c:78
double get_circuit_build_close_time_ms(void)
Definition: circuitstats.c:93
void circuit_build_times_new_consensus_params(circuit_build_times_t *cbt, const networkstatus_t *ns)
Definition: circuitstats.c:426
double circuit_build_times_timeout_rate(const circuit_build_times_t *cbt)
void circuit_build_times_update_state(const circuit_build_times_t *cbt, or_state_t *state)
Definition: circuitstats.c:917
void circuit_build_times_mark_circ_as_measurement_only(origin_circuit_t *circ)
Definition: circuitstats.c:637
void circuit_build_times_init(circuit_build_times_t *cbt)
Definition: circuitstats.c:565
int32_t circuit_build_times_initial_timeout(void)
Definition: circuitstats.c:370
STATIC build_time_t circuit_build_times_get_xm(circuit_build_times_t *cbt)
Definition: circuitstats.c:859
void circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
static circuit_build_times_t circ_times
Definition: circuitstats.c:65
STATIC int circuit_build_times_update_alpha(circuit_build_times_t *cbt)
static void circuit_build_times_scale_circ_counts(circuit_build_times_t *cbt)
static build_time_t circuit_build_times_max(const circuit_build_times_t *cbt)
Definition: circuitstats.c:785
static int32_t circuit_build_times_recent_circuit_count(const networkstatus_t *ns)
Definition: circuitstats.c:401
static void circuit_build_times_network_timeout(circuit_build_times_t *cbt, int did_onehop)
int circuit_build_times_enough_to_compute(const circuit_build_times_t *cbt)
Definition: circuitstats.c:255
static int32_t circuit_build_times_min_circs_to_observe(void)
Definition: circuitstats.c:235
int circuit_build_times_count_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time)
int circuit_build_times_disabled(const or_options_t *options)
Definition: circuitstats.c:117
static double circuit_build_times_close_quantile(void)
Definition: circuitstats.c:292
circuit_build_times_t * get_circuit_build_times_mutable(void)
Definition: circuitstats.c:85
double circuit_build_times_quantile_cutoff(void)
Definition: circuitstats.c:267
static void circuit_build_times_network_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time)
static int32_t circuit_build_times_test_frequency(void)
Definition: circuitstats.c:324
static int32_t circuit_build_times_max_timeouts(void)
Definition: circuitstats.c:182
void circuit_build_times_reset(circuit_build_times_t *cbt)
Definition: circuitstats.c:545
static int32_t circuit_build_times_default_num_xm_modes(void)
Definition: circuitstats.c:211
static int circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
int circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t btime)
Definition: circuitstats.c:756
static double circuit_build_times_get_initial_timeout(void)
Definition: circuitstats.c:514
static int32_t circuit_build_times_min_timeout(void)
Definition: circuitstats.c:348
double circuit_build_times_close_rate(const circuit_build_times_t *cbt)
int circuit_build_times_needs_circuits(const circuit_build_times_t *cbt)
Header file for circuitstats.c.
#define CBT_BIN_WIDTH
Definition: circuitstats.h:59
#define CBT_NCIRCUITS_TO_OBSERVE
Definition: circuitstats.h:56
#define CBT_DEFAULT_CLOSE_QUANTILE
Definition: circuitstats.h:82
#define CBT_SAVE_STATE_EVERY
Definition: circuitstats.h:74
uint32_t build_time_t
Definition: circuitstats.h:25
#define CBT_BUILD_ABANDONED
Definition: circuitstats.h:70
#define CBT_DEFAULT_TIMEOUT_INITIAL_VALUE
Definition: circuitstats.h:128
#define CBT_DEFAULT_MIN_CIRCUITS_TO_OBSERVE
Definition: circuitstats.h:107
#define CBT_DEFAULT_TEST_FREQUENCY
Definition: circuitstats.h:118
#define CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT
Definition: circuitstats.h:102
#define CBT_DEFAULT_TIMEOUT_MIN_VALUE
Definition: circuitstats.h:123
#define CBT_DEFAULT_NUM_XM_MODES
Definition: circuitstats.h:62
#define CBT_DEFAULT_RECENT_CIRCUITS
Definition: circuitstats.h:90
#define CBT_DEFAULT_QUANTILE_CUTOFF
Definition: circuitstats.h:112
void circuit_change_purpose(circuit_t *circ, uint8_t new_purpose)
Definition: circuituse.c:3028
Header file for circuituse.c.
#define MAX(a, b)
Definition: cmp.h:22
const or_options_t * get_options(void)
Definition: config.c:926
Header file for config.c.
Header for confline.c.
Header for confmgt.c.
int control_event_network_liveness_update(int liveness)
Header file for control_events.c.
Path structures for origin circuits.
Common functions for using (pseudo-)random number generators.
double crypto_rand_double(void)
int crypto_rand_int(unsigned int max)
double tor_mathlog(double d)
Definition: fp.c:22
long tor_lround(double d)
Definition: fp.c:31
Header for fp.c.
Header file containing service data for the HS subsystem.
#define LD_BUG
Definition: log.h:86
#define LD_GENERAL
Definition: log.h:62
#define LD_DIR
Definition: log.h:88
#define LD_CIRC
Definition: log.h:82
Header file for mainloop.c.
#define tor_free(p)
Definition: malloc.h:56
int32_t networkstatus_get_param(const networkstatus_t *ns, const char *param_name, int32_t default_val, int32_t min_val, int32_t max_val)
Header file for networkstatus.c.
Master header file for Tor-specific functionality.
#define DEFAULT_ROUTE_LEN
Definition: or.h:890
#define TO_CIRCUIT(x)
Definition: or.h:836
The or_state_t structure, which represents Tor's state file.
Origin circuit structure.
unsigned long tor_parse_ulong(const char *s, int base, unsigned long min, unsigned long max, int *ok, char **next)
Definition: parse_int.c:78
int tor_asprintf(char **strp, const char *fmt,...)
Definition: printf.c:75
void reschedule_descriptor_update_check(void)
Header for feature/relay/relay_periodic.c.
Header file for router.c.
smartlist_t * smartlist_new(void)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
int smartlist_split_string(smartlist_t *sl, const char *str, const char *sep, int flags, int max)
or_state_t * get_or_state(void)
Definition: statefile.c:220
void or_state_mark_dirty(or_state_t *state, time_t when)
Definition: statefile.c:784
int did_last_state_file_write_fail(void)
Definition: statefile.c:547
Header for statefile.c.
build_time_t circuit_build_times[CBT_NCIRCUITS_TO_OBSERVE]
Definition: circuitstats.h:180
network_liveness_t liveness
Definition: circuitstats.h:186
uint8_t purpose
Definition: circuit_st.h:112
struct timeval timestamp_began
Definition: circuit_st.h:166
uint8_t state
Definition: crypt_path_st.h:73
int8_t * timeouts_after_firsthop
Definition: circuitstats.h:170
int CircuitBuildTimeout
int LearnCircuitBuildTimeout
struct config_line_t * BuildtimeHistogram
Definition: or_state_st.h:80
unsigned int relaxed_timeout
crypt_path_t * cpath
#define STATIC
Definition: testsupport.h:32
void format_local_iso_time(char *buf, time_t t)
Definition: time_fmt.c:316
void tor_gettimeofday(struct timeval *timeval)
long tv_mdiff(const struct timeval *start, const struct timeval *end)
Definition: tvdiff.c:102
Header for tvdiff.c.
#define tor_assert(expr)
Definition: util_bug.h:102
#define tor_fragile_assert()
Definition: util_bug.h:270