Tor 0.4.9.0-alpha-dev
consdiff.c
Go to the documentation of this file.
1/* Copyright (c) 2014, Daniel Martí
2 * Copyright (c) 2014-2021, The Tor Project, Inc. */
3/* See LICENSE for licensing information */
4
5/**
6 * \file consdiff.c
7 * \brief Consensus diff implementation, including both the generation and the
8 * application of diffs in a minimal ed format.
9 *
10 * The consensus diff application is done in consdiff_apply_diff, which relies
11 * on apply_ed_diff for the main ed diff part and on some digest helper
12 * functions to check the digest hashes found in the consensus diff header.
13 *
14 * The consensus diff generation is more complex. consdiff_gen_diff generates
15 * it, relying on gen_ed_diff to generate the ed diff and some digest helper
16 * functions to generate the digest hashes.
17 *
18 * gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic
19 * time and linear space to generate an ed diff given two smartlists. As shown
20 * in its comment section, calling calc_changes on the entire two consensuses
21 * will calculate what is to be added and what is to be deleted in the diff.
22 * Its comment section briefly explains how it works.
23 *
24 * In our case specific to consensuses, we take advantage of the fact that
25 * consensuses list routers sorted by their identities. We use that
26 * information to avoid running calc_changes on the whole smartlists.
27 * gen_ed_diff will navigate through the two consensuses identity by identity
28 * and will send small couples of slices to calc_changes, keeping the running
29 * time near-linear. This is explained in more detail in the gen_ed_diff
30 * comments.
31 *
32 * The allocation strategy tries to save time and memory by avoiding needless
33 * copies. Instead of actually splitting the inputs into separate strings, we
34 * allocate cdline_t objects, each of which represents a line in the original
35 * object or in the output. We use memarea_t allocators to manage the
36 * temporary memory we use when generating or applying diffs.
37 **/
38
39#define CONSDIFF_PRIVATE
40
41#include "core/or/or.h"
43#include "lib/memarea/memarea.h"
45
46static const char* ns_diff_version = "network-status-diff-version 1";
47static const char* hash_token = "hash";
48
49static char *consensus_join_lines(const smartlist_t *inp);
50
51/** Return true iff a and b have the same contents. */
52STATIC int
53lines_eq(const cdline_t *a, const cdline_t *b)
54{
55 return a->len == b->len && fast_memeq(a->s, b->s, a->len);
56}
57
58/** Return true iff a has the same contents as the nul-terminated string b. */
59STATIC int
60line_str_eq(const cdline_t *a, const char *b)
61{
62 const size_t len = strlen(b);
63 tor_assert(len <= UINT32_MAX);
64 cdline_t bline = { b, (uint32_t)len };
65 return lines_eq(a, &bline);
66}
67
68/** Return true iff a begins with the same contents as the nul-terminated
69 * string b. */
70static int
71line_starts_with_str(const cdline_t *a, const char *b)
72{
73 const size_t len = strlen(b);
74 tor_assert(len <= UINT32_MAX);
75 return a->len >= len && fast_memeq(a->s, b, len);
76}
77
78/** Return a new cdline_t holding as its contents the nul-terminated
79 * string s. Use the provided memory area for storage. */
80static cdline_t *
81cdline_linecpy(memarea_t *area, const char *s)
82{
83 size_t len = strlen(s);
84 const char *ss = memarea_memdup(area, s, len);
85 cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
86 line->s = ss;
87 line->len = (uint32_t)len;
88 return line;
89}
90
91/** Add a cdline_t to <b>lst</b> holding as its contents the nul-terminated
92 * string s. Use the provided memory area for storage. */
93STATIC void
94smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
95{
96 smartlist_add(lst, cdline_linecpy(area, s));
97}
98
99/** Compute the digest of <b>cons</b>, and store the result in
100 * <b>digest_out</b>. Return 0 on success, -1 on failure. */
101/* This is a separate, mockable function so that we can override it when
102 * fuzzing. */
103MOCK_IMPL(STATIC int,
104consensus_compute_digest,(const char *cons, size_t len,
105 consensus_digest_t *digest_out))
106{
107 int r = crypto_digest256((char*)digest_out->sha3_256,
108 cons, len, DIGEST_SHA3_256);
109 return r;
110}
111
112/** Compute the digest-as-signed of <b>cons</b>, and store the result in
113 * <b>digest_out</b>. Return 0 on success, -1 on failure. */
114/* This is a separate, mockable function so that we can override it when
115 * fuzzing. */
116MOCK_IMPL(STATIC int,
117consensus_compute_digest_as_signed,(const char *cons, size_t len,
118 consensus_digest_t *digest_out))
119{
120 return router_get_networkstatus_v3_sha3_as_signed(digest_out->sha3_256,
121 cons, len);
122}
123
124/** Return true iff <b>d1</b> and <b>d2</b> contain the same digest */
125/* This is a separate, mockable function so that we can override it when
126 * fuzzing. */
127MOCK_IMPL(STATIC int,
128consensus_digest_eq,(const uint8_t *d1,
129 const uint8_t *d2))
130{
131 return fast_memeq(d1, d2, DIGEST256_LEN);
132}
133
134/** Create (allocate) a new slice from a smartlist. Assumes that the start
135 * and the end indexes are within the bounds of the initial smartlist. The end
136 * element is not part of the resulting slice. If end is -1, the slice is to
137 * reach the end of the smartlist.
138 */
139STATIC smartlist_slice_t *
140smartlist_slice(const smartlist_t *list, int start, int end)
141{
142 int list_len = smartlist_len(list);
143 tor_assert(start >= 0);
144 tor_assert(start <= list_len);
145 if (end == -1) {
146 end = list_len;
147 }
148 tor_assert(start <= end);
149
150 smartlist_slice_t *slice = tor_malloc(sizeof(smartlist_slice_t));
151 slice->list = list;
152 slice->offset = start;
153 slice->len = end - start;
154 return slice;
155}
156
157/** Helper: Compute the longest common subsequence lengths for the two slices.
158 * Used as part of the diff generation to find the column at which to split
159 * slice2 while still having the optimal solution.
160 * If direction is -1, the navigation is reversed. Otherwise it must be 1.
161 * The length of the resulting integer array is that of the second slice plus
162 * one.
163 */
164STATIC int *
165lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2,
166 int direction)
167{
168 size_t a_size = sizeof(int) * (slice2->len+1);
169
170 /* Resulting lcs lengths. */
171 int *result = tor_malloc_zero(a_size);
172 /* Copy of the lcs lengths from the last iteration. */
173 int *prev = tor_malloc(a_size);
174
175 tor_assert(direction == 1 || direction == -1);
176
177 int si = slice1->offset;
178 if (direction == -1) {
179 si += (slice1->len-1);
180 }
181
182 for (int i = 0; i < slice1->len; ++i, si+=direction) {
183
184 const cdline_t *line1 = smartlist_get(slice1->list, si);
185 /* Store the last results. */
186 memcpy(prev, result, a_size);
187
188 int sj = slice2->offset;
189 if (direction == -1) {
190 sj += (slice2->len-1);
191 }
192
193 for (int j = 0; j < slice2->len; ++j, sj+=direction) {
194
195 const cdline_t *line2 = smartlist_get(slice2->list, sj);
196 if (lines_eq(line1, line2)) {
197 /* If the lines are equal, the lcs is one line longer. */
198 result[j + 1] = prev[j] + 1;
199 } else {
200 /* If not, see what lcs parent path is longer. */
201 result[j + 1] = MAX(result[j], prev[j + 1]);
202 }
203 }
204 }
205 tor_free(prev);
206 return result;
207}
208
209/** Helper: Trim any number of lines that are equally at the start or the end
210 * of both slices.
211 */
212STATIC void
213trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
214{
215 while (slice1->len>0 && slice2->len>0) {
216 const cdline_t *line1 = smartlist_get(slice1->list, slice1->offset);
217 const cdline_t *line2 = smartlist_get(slice2->list, slice2->offset);
218 if (!lines_eq(line1, line2)) {
219 break;
220 }
221 slice1->offset++; slice1->len--;
222 slice2->offset++; slice2->len--;
223 }
224
225 int i1 = (slice1->offset+slice1->len)-1;
226 int i2 = (slice2->offset+slice2->len)-1;
227
228 while (slice1->len>0 && slice2->len>0) {
229 const cdline_t *line1 = smartlist_get(slice1->list, i1);
230 const cdline_t *line2 = smartlist_get(slice2->list, i2);
231 if (!lines_eq(line1, line2)) {
232 break;
233 }
234 i1--;
235 slice1->len--;
236 i2--;
237 slice2->len--;
238 }
239}
240
241/** Like smartlist_string_pos, but uses a cdline_t, and is restricted to the
242 * bounds of the slice.
243 */
244STATIC int
245smartlist_slice_string_pos(const smartlist_slice_t *slice,
246 const cdline_t *string)
247{
248 int end = slice->offset + slice->len;
249 for (int i = slice->offset; i < end; ++i) {
250 const cdline_t *el = smartlist_get(slice->list, i);
251 if (lines_eq(el, string)) {
252 return i;
253 }
254 }
255 return -1;
256}
257
258/** Helper: Set all the appropriate changed booleans to true. The first slice
259 * must be of length 0 or 1. All the lines of slice1 and slice2 which are not
260 * present in the other slice will be set to changed in their bool array.
261 * The two changed bool arrays are passed in the same order as the slices.
262 */
263STATIC void
264set_changed(bitarray_t *changed1, bitarray_t *changed2,
265 const smartlist_slice_t *slice1, const smartlist_slice_t *slice2)
266{
267 int toskip = -1;
268 tor_assert(slice1->len == 0 || slice1->len == 1);
269
270 if (slice1->len == 1) {
271 const cdline_t *line_common = smartlist_get(slice1->list, slice1->offset);
272 toskip = smartlist_slice_string_pos(slice2, line_common);
273 if (toskip == -1) {
274 bitarray_set(changed1, slice1->offset);
275 }
276 }
277 int end = slice2->offset + slice2->len;
278 for (int i = slice2->offset; i < end; ++i) {
279 if (i != toskip) {
280 bitarray_set(changed2, i);
281 }
282 }
283}
284
285/*
286 * Helper: Given that slice1 has been split by half into top and bot, we want
287 * to fetch the column at which to split slice2 so that we are still on track
288 * to the optimal diff solution, i.e. the shortest one. We use lcs_lengths
289 * since the shortest diff is just another way to say the longest common
290 * subsequence.
291 */
292static int
293optimal_column_to_split(const smartlist_slice_t *top,
294 const smartlist_slice_t *bot,
295 const smartlist_slice_t *slice2)
296{
297 int *lens_top = lcs_lengths(top, slice2, 1);
298 int *lens_bot = lcs_lengths(bot, slice2, -1);
299 int column=0, max_sum=-1;
300
301 for (int i = 0; i < slice2->len+1; ++i) {
302 int sum = lens_top[i] + lens_bot[slice2->len-i];
303 if (sum > max_sum) {
304 column = i;
305 max_sum = sum;
306 }
307 }
308 tor_free(lens_top);
309 tor_free(lens_bot);
310
311 return column;
312}
313
314/**
315 * Helper: Figure out what elements are new or gone on the second smartlist
316 * relative to the first smartlist, and store the booleans in the bitarrays.
317 * True on the first bitarray means the element is gone, true on the second
318 * bitarray means it's new.
319 *
320 * In its base case, either of the smartlists is of length <= 1 and we can
321 * quickly see what elements are new or are gone. In the other case, we will
322 * split one smartlist by half and we'll use optimal_column_to_split to find
323 * the optimal column at which to split the second smartlist so that we are
324 * finding the smallest diff possible.
325 */
326STATIC void
327calc_changes(smartlist_slice_t *slice1,
328 smartlist_slice_t *slice2,
329 bitarray_t *changed1, bitarray_t *changed2)
330{
331 trim_slices(slice1, slice2);
332
333 if (slice1->len <= 1) {
334 set_changed(changed1, changed2, slice1, slice2);
335
336 } else if (slice2->len <= 1) {
337 set_changed(changed2, changed1, slice2, slice1);
338
339 /* Keep on splitting the slices in two. */
340 } else {
341 smartlist_slice_t *top, *bot, *left, *right;
342
343 /* Split the first slice in half. */
344 int mid = slice1->len/2;
345 top = smartlist_slice(slice1->list, slice1->offset, slice1->offset+mid);
346 bot = smartlist_slice(slice1->list, slice1->offset+mid,
347 slice1->offset+slice1->len);
348
349 /* Split the second slice by the optimal column. */
350 int mid2 = optimal_column_to_split(top, bot, slice2);
351 left = smartlist_slice(slice2->list, slice2->offset, slice2->offset+mid2);
352 right = smartlist_slice(slice2->list, slice2->offset+mid2,
353 slice2->offset+slice2->len);
354
355 calc_changes(top, left, changed1, changed2);
356 calc_changes(bot, right, changed1, changed2);
357 tor_free(top);
358 tor_free(bot);
359 tor_free(left);
360 tor_free(right);
361 }
362}
363
364/* This table is from crypto.c. The SP and PAD defines are different. */
365#define NOT_VALID_BASE64 255
366#define X NOT_VALID_BASE64
367#define SP NOT_VALID_BASE64
368#define PAD NOT_VALID_BASE64
369static const uint8_t base64_compare_table[256] = {
370 X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X,
371 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
372 SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
373 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
374 X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
375 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
376 X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
377 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
378 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
379 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
380 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
381 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
382 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
383 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
384 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
385 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
386};
387
388/** Helper: Get the identity hash from a router line, assuming that the line
389 * at least appears to be a router line and thus starts with "r ".
390 *
391 * If an identity hash is found, store it (without decoding it) in
392 * <b>hash_out</b>, and return 0. On failure, return -1.
393 */
394STATIC int
395get_id_hash(const cdline_t *line, cdline_t *hash_out)
396{
397 if (line->len < 2)
398 return -1;
399
400 /* Skip the router name. */
401 const char *hash = memchr(line->s + 2, ' ', line->len - 2);
402 if (!hash) {
403 return -1;
404 }
405
406 hash++;
407 const char *hash_end = hash;
408 /* Stop when the first non-base64 character is found. Use unsigned chars to
409 * avoid negative indexes causing crashes.
410 */
411 while (base64_compare_table[*((unsigned char*)hash_end)]
412 != NOT_VALID_BASE64 &&
413 hash_end < line->s + line->len) {
414 hash_end++;
415 }
416
417 /* Empty hash. */
418 if (hash_end == hash) {
419 return -1;
420 }
421
422 hash_out->s = hash;
423 /* Always true because lines are limited to this length */
424 tor_assert(hash_end >= hash);
425 tor_assert((size_t)(hash_end - hash) <= UINT32_MAX);
426 hash_out->len = (uint32_t)(hash_end - hash);
427
428 return 0;
429}
430
431/** Helper: Check that a line is a valid router entry. We must at least be
432 * able to fetch a proper identity hash from it for it to be valid.
433 */
434STATIC int
435is_valid_router_entry(const cdline_t *line)
436{
437 if (line->len < 2 || fast_memneq(line->s, "r ", 2))
438 return 0;
439 cdline_t tmp;
440 return (get_id_hash(line, &tmp) == 0);
441}
442
443/** Helper: Find the next router line starting at the current position.
444 * Assumes that cur is lower than the length of the smartlist, i.e. it is a
445 * line within the bounds of the consensus. The only exception is when we
446 * don't want to skip the first line, in which case cur will be -1.
447 */
448STATIC int
449next_router(const smartlist_t *cons, int cur)
450{
451 int len = smartlist_len(cons);
452 tor_assert(cur >= -1 && cur < len);
453
454 if (++cur >= len) {
455 return len;
456 }
457
458 const cdline_t *line = smartlist_get(cons, cur);
459 while (!is_valid_router_entry(line)) {
460 if (++cur >= len) {
461 return len;
462 }
463 line = smartlist_get(cons, cur);
464 }
465 return cur;
466}
467
468/** Helper: compare two base64-encoded identity hashes, which may be of
469 * different lengths. Comparison ends when the first non-base64 char is found.
470 */
471STATIC int
472base64cmp(const cdline_t *hash1, const cdline_t *hash2)
473{
474 /* NULL is always lower, useful for last_hash which starts at NULL. */
475 if (!hash1->s && !hash2->s) {
476 return 0;
477 }
478 if (!hash1->s) {
479 return -1;
480 }
481 if (!hash2->s) {
482 return 1;
483 }
484
485 /* Don't index with a char; char may be signed. */
486 const unsigned char *a = (unsigned char*)hash1->s;
487 const unsigned char *b = (unsigned char*)hash2->s;
488 const unsigned char *a_end = a + hash1->len;
489 const unsigned char *b_end = b + hash2->len;
490 while (1) {
491 uint8_t av = base64_compare_table[*a];
492 uint8_t bv = base64_compare_table[*b];
493 if (av == NOT_VALID_BASE64) {
494 if (bv == NOT_VALID_BASE64) {
495 /* Both ended with exactly the same characters. */
496 return 0;
497 } else {
498 /* hash2 goes on longer than hash1 and thus hash1 is lower. */
499 return -1;
500 }
501 } else if (bv == NOT_VALID_BASE64) {
502 /* hash1 goes on longer than hash2 and thus hash1 is greater. */
503 return 1;
504 } else if (av < bv) {
505 /* The first difference shows that hash1 is lower. */
506 return -1;
507 } else if (av > bv) {
508 /* The first difference shows that hash1 is greater. */
509 return 1;
510 } else {
511 a++;
512 b++;
513 if (a == a_end) {
514 if (b == b_end) {
515 return 0;
516 } else {
517 return -1;
518 }
519 } else if (b == b_end) {
520 return 1;
521 }
522 }
523 }
524}
525
526/** Structure used to remember the previous and current identity hash of
527 * the "r " lines in a consensus, to enforce well-ordering. */
528typedef struct router_id_iterator_t {
529 cdline_t last_hash;
530 cdline_t hash;
532
533#ifndef COCCI
534/**
535 * Initializer for a router_id_iterator_t.
536 */
537#define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }
538#endif /* !defined(COCCI) */
539
540/** Given an index *<b>idxp</b> into the consensus at <b>cons</b>, advance
541 * the index to the next router line ("r ...") in the consensus, or to
542 * an index one after the end of the list if there is no such line.
543 *
544 * Use <b>iter</b> to record the hash of the found router line, if any,
545 * and to enforce ordering on the hashes. If the hashes are mis-ordered,
546 * return -1. Else, return 0.
547 **/
548static int
550 const char *consname,
551 int *idxp,
553{
554 *idxp = next_router(cons, *idxp);
555 if (*idxp < smartlist_len(cons)) {
556 memcpy(&iter->last_hash, &iter->hash, sizeof(cdline_t));
557 if (get_id_hash(smartlist_get(cons, *idxp), &iter->hash) < 0 ||
558 base64cmp(&iter->hash, &iter->last_hash) <= 0) {
559 log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
560 "the %s consensus doesn't have its router entries sorted "
561 "properly.", consname);
562 return -1;
563 }
564 }
565 return 0;
566}
567
568/** Line-prefix indicating the beginning of the signatures section that we
569 * intend to delete. */
570#define START_OF_SIGNATURES_SECTION "directory-signature "
571
572/** Pre-process a consensus in <b>cons</b> (represented as a list of cdline_t)
573 * to remove the signatures from it. If the footer is removed, return a
574 * cdline_t containing a delete command to delete the footer, allocated in
575 * <b>area</b>. If no footer is removed, return NULL.
576 *
577 * We remove the signatures here because they are not themselves signed, and
578 * as such there might be different encodings for them.
579 */
580static cdline_t *
582 smartlist_t *cons)
583{
584 int idx;
585 int dirsig_idx = -1;
586 for (idx = 0; idx < smartlist_len(cons); ++idx) {
587 cdline_t *line = smartlist_get(cons, idx);
589 dirsig_idx = idx;
590 break;
591 }
592 }
593 if (dirsig_idx >= 0) {
594 char buf[64];
595 while (smartlist_len(cons) > dirsig_idx)
596 smartlist_del(cons, dirsig_idx);
597 tor_snprintf(buf, sizeof(buf), "%d,$d", dirsig_idx+1);
598 return cdline_linecpy(area, buf);
599 } else {
600 return NULL;
601 }
602}
603
604/** Generate an ed diff as a smartlist from two consensuses, also given as
605 * smartlists. Will return NULL if the diff could not be generated, which can
606 * happen if any lines the script had to add matched "." or if the routers
607 * were not properly ordered.
608 *
609 * All cdline_t objects in the resulting object are either references to lines
610 * in one of the inputs, or are newly allocated lines in the provided memarea.
611 *
612 * This implementation is consensus-specific. To generate an ed diff for any
613 * given input in quadratic time, you can replace all the code until the
614 * navigation in reverse order with the following:
615 *
616 * int len1 = smartlist_len(cons1);
617 * int len2 = smartlist_len(cons2);
618 * bitarray_t *changed1 = bitarray_init_zero(len1);
619 * bitarray_t *changed2 = bitarray_init_zero(len2);
620 * cons1_sl = smartlist_slice(cons1, 0, -1);
621 * cons2_sl = smartlist_slice(cons2, 0, -1);
622 * calc_changes(cons1_sl, cons2_sl, changed1, changed2);
623 */
625gen_ed_diff(const smartlist_t *cons1_orig, const smartlist_t *cons2,
626 memarea_t *area)
627{
628 smartlist_t *cons1 = smartlist_new();
629 smartlist_add_all(cons1, cons1_orig);
630 cdline_t *remove_trailer = preprocess_consensus(area, cons1);
631
632 int len1 = smartlist_len(cons1);
633 int len2 = smartlist_len(cons2);
634 smartlist_t *result = smartlist_new();
635
636 if (remove_trailer) {
637 /* There's a delete-the-trailer line at the end, so add it here. */
638 smartlist_add(result, remove_trailer);
639 }
640
641 /* Initialize the changed bitarrays to zero, so that calc_changes only needs
642 * to set the ones that matter and leave the rest untouched.
643 */
644 bitarray_t *changed1 = bitarray_init_zero(len1);
645 bitarray_t *changed2 = bitarray_init_zero(len2);
646 int i1=-1, i2=-1;
647 int start1=0, start2=0;
648
649 /* To check that hashes are ordered properly */
652
653 /* i1 and i2 are initialized at the first line of each consensus. They never
654 * reach past len1 and len2 respectively, since next_router doesn't let that
655 * happen. i1 and i2 are advanced by at least one line at each iteration as
656 * long as they have not yet reached len1 and len2, so the loop is
657 * guaranteed to end, and each pair of (i1,i2) will be inspected at most
658 * once.
659 */
660 while (i1 < len1 || i2 < len2) {
661
662 /* Advance each of the two navigation positions by one router entry if not
663 * yet at the end.
664 */
665 if (i1 < len1) {
666 if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
667 goto error_cleanup;
668 }
669 }
670
671 if (i2 < len2) {
672 if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
673 goto error_cleanup;
674 }
675 }
676
677 /* If we have reached the end of both consensuses, there is no need to
678 * compare hashes anymore, since this is the last iteration.
679 */
680 if (i1 < len1 || i2 < len2) {
681
682 /* Keep on advancing the lower (by identity hash sorting) position until
683 * we have two matching positions. The only other possible outcome is
684 * that a lower position reaches the end of the consensus before it can
685 * reach a hash that is no longer the lower one. Since there will always
686 * be a lower hash for as long as the loop runs, one of the two indexes
687 * will always be incremented, thus assuring that the loop must end
688 * after a finite number of iterations. If that cannot be because said
689 * consensus has already reached the end, both are extended to their
690 * respecting ends since we are done.
691 */
692 int cmp = base64cmp(&iter1.hash, &iter2.hash);
693 while (cmp != 0) {
694 if (i1 < len1 && cmp < 0) {
695 if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
696 goto error_cleanup;
697 }
698 if (i1 == len1) {
699 /* We finished the first consensus, so grab all the remaining
700 * lines of the second consensus and finish up.
701 */
702 i2 = len2;
703 break;
704 }
705 } else if (i2 < len2 && cmp > 0) {
706 if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
707 goto error_cleanup;
708 }
709 if (i2 == len2) {
710 /* We finished the second consensus, so grab all the remaining
711 * lines of the first consensus and finish up.
712 */
713 i1 = len1;
714 break;
715 }
716 } else {
717 i1 = len1;
718 i2 = len2;
719 break;
720 }
721 cmp = base64cmp(&iter1.hash, &iter2.hash);
722 }
723 }
724
725 /* Make slices out of these chunks (up to the common router entry) and
726 * calculate the changes for them.
727 * Error if any of the two slices are longer than 10K lines. That should
728 * never happen with any pair of real consensuses. Feeding more than 10K
729 * lines to calc_changes would be very slow anyway.
730 */
731#define MAX_LINE_COUNT (10000)
732 if (i1-start1 > MAX_LINE_COUNT || i2-start2 > MAX_LINE_COUNT) {
733 log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
734 "we found too few common router ids.");
735 goto error_cleanup;
736 }
737
738 smartlist_slice_t *cons1_sl = smartlist_slice(cons1, start1, i1);
739 smartlist_slice_t *cons2_sl = smartlist_slice(cons2, start2, i2);
740 calc_changes(cons1_sl, cons2_sl, changed1, changed2);
741 tor_free(cons1_sl);
742 tor_free(cons2_sl);
743 start1 = i1, start2 = i2;
744 }
745
746 /* Navigate the changes in reverse order and generate one ed command for
747 * each chunk of changes.
748 */
749 i1=len1-1, i2=len2-1;
750 char buf[128];
751 while (i1 >= 0 || i2 >= 0) {
752
753 int start1x, start2x, end1, end2, added, deleted;
754
755 /* We are at a point were no changed bools are true, so just keep going. */
756 if (!(i1 >= 0 && bitarray_is_set(changed1, i1)) &&
757 !(i2 >= 0 && bitarray_is_set(changed2, i2))) {
758 if (i1 >= 0) {
759 i1--;
760 }
761 if (i2 >= 0) {
762 i2--;
763 }
764 continue;
765 }
766
767 end1 = i1, end2 = i2;
768
769 /* Grab all contiguous changed lines */
770 while (i1 >= 0 && bitarray_is_set(changed1, i1)) {
771 i1--;
772 }
773 while (i2 >= 0 && bitarray_is_set(changed2, i2)) {
774 i2--;
775 }
776
777 start1x = i1+1, start2x = i2+1;
778 added = end2-i2, deleted = end1-i1;
779
780 if (added == 0) {
781 if (deleted == 1) {
782 tor_snprintf(buf, sizeof(buf), "%id", start1x+1);
783 smartlist_add_linecpy(result, area, buf);
784 } else {
785 tor_snprintf(buf, sizeof(buf), "%i,%id", start1x+1, start1x+deleted);
786 smartlist_add_linecpy(result, area, buf);
787 }
788 } else {
789 int i;
790 if (deleted == 0) {
791 tor_snprintf(buf, sizeof(buf), "%ia", start1x);
792 smartlist_add_linecpy(result, area, buf);
793 } else if (deleted == 1) {
794 tor_snprintf(buf, sizeof(buf), "%ic", start1x+1);
795 smartlist_add_linecpy(result, area, buf);
796 } else {
797 tor_snprintf(buf, sizeof(buf), "%i,%ic", start1x+1, start1x+deleted);
798 smartlist_add_linecpy(result, area, buf);
799 }
800
801 for (i = start2x; i <= end2; ++i) {
802 cdline_t *line = smartlist_get(cons2, i);
803 if (line_str_eq(line, ".")) {
804 log_warn(LD_CONSDIFF, "Cannot generate consensus diff because "
805 "one of the lines to be added is \".\".");
806 goto error_cleanup;
807 }
808 smartlist_add(result, line);
809 }
810 smartlist_add_linecpy(result, area, ".");
811 }
812 }
813
814 smartlist_free(cons1);
815 bitarray_free(changed1);
816 bitarray_free(changed2);
817
818 return result;
819
820 error_cleanup:
821
822 smartlist_free(cons1);
823 bitarray_free(changed1);
824 bitarray_free(changed2);
825
826 smartlist_free(result);
827
828 return NULL;
829}
830
831/* Helper: Read a base-10 number between 0 and INT32_MAX from <b>s</b> and
832 * store it in <b>num_out</b>. Advance <b>s</b> to the character immediately
833 * after the number. Return 0 on success, -1 on failure. */
834static int
835get_linenum(const char **s, int *num_out)
836{
837 int ok;
838 char *next;
839 if (!TOR_ISDIGIT(**s)) {
840 return -1;
841 }
842 *num_out = (int) tor_parse_long(*s, 10, 0, INT32_MAX, &ok, &next);
843 if (ok && next) {
844 *s = next;
845 return 0;
846 } else {
847 return -1;
848 }
849}
850
851/** Apply the ed diff, starting at <b>diff_starting_line</b>, to the consensus
852 * and return a new consensus, also as a line-based smartlist. Will return
853 * NULL if the ed diff is not properly formatted.
854 *
855 * All cdline_t objects in the resulting object are references to lines
856 * in one of the inputs; nothing is copied.
857 */
859apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
860 int diff_starting_line)
861{
862 int diff_len = smartlist_len(diff);
863 int j = smartlist_len(cons1);
864 smartlist_t *cons2 = smartlist_new();
865
866 for (int i=diff_starting_line; i<diff_len; ++i) {
867 const cdline_t *diff_cdline = smartlist_get(diff, i);
868 char diff_line[128];
869
870 if (diff_cdline->len > sizeof(diff_line) - 1) {
871 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
872 "an ed command was far too long");
873 goto error_cleanup;
874 }
875 /* Copy the line to make it nul-terminated. */
876 memcpy(diff_line, diff_cdline->s, diff_cdline->len);
877 diff_line[diff_cdline->len] = 0;
878 const char *ptr = diff_line;
879 int start = 0, end = 0;
880 int had_range = 0;
881 int end_was_eof = 0;
882 if (get_linenum(&ptr, &start) < 0) {
883 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
884 "an ed command was missing a line number.");
885 goto error_cleanup;
886 }
887 if (*ptr == ',') {
888 /* Two-item range */
889 had_range = 1;
890 ++ptr;
891 if (*ptr == '$') {
892 end_was_eof = 1;
893 end = smartlist_len(cons1);
894 ++ptr;
895 } else if (get_linenum(&ptr, &end) < 0) {
896 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
897 "an ed command was missing a range end line number.");
898 goto error_cleanup;
899 }
900 /* Incoherent range. */
901 if (end <= start) {
902 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
903 "an invalid range was found in an ed command.");
904 goto error_cleanup;
905 }
906 } else {
907 /* We'll take <n1> as <n1>,<n1> for simplicity. */
908 end = start;
909 }
910
911 if (end > j) {
912 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
913 "its commands are not properly sorted in reverse order.");
914 goto error_cleanup;
915 }
916
917 if (*ptr == '\0') {
918 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
919 "a line with no ed command was found");
920 goto error_cleanup;
921 }
922
923 if (*(ptr+1) != '\0') {
924 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
925 "an ed command longer than one char was found.");
926 goto error_cleanup;
927 }
928
929 char action = *ptr;
930
931 switch (action) {
932 case 'a':
933 case 'c':
934 case 'd':
935 break;
936 default:
937 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
938 "an unrecognised ed command was found.");
939 goto error_cleanup;
940 }
941
942 /** $ is not allowed with non-d actions. */
943 if (end_was_eof && action != 'd') {
944 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
945 "it wanted to use $ with a command other than delete");
946 goto error_cleanup;
947 }
948
949 /* 'a' commands are not allowed to have ranges. */
950 if (had_range && action == 'a') {
951 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
952 "it wanted to add lines after a range.");
953 goto error_cleanup;
954 }
955
956 /* Add unchanged lines. */
957 for (; j && j > end; --j) {
958 cdline_t *cons_line = smartlist_get(cons1, j-1);
959 smartlist_add(cons2, cons_line);
960 }
961
962 /* Ignore removed lines. */
963 if (action == 'c' || action == 'd') {
964 while (--j >= start) {
965 /* Skip line */
966 }
967 }
968
969 /* Add new lines in reverse order, since it will all be reversed at the
970 * end.
971 */
972 if (action == 'a' || action == 'c') {
973 int added_end = i;
974
975 i++; /* Skip the line with the range and command. */
976 while (i < diff_len) {
977 if (line_str_eq(smartlist_get(diff, i), ".")) {
978 break;
979 }
980 if (++i == diff_len) {
981 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
982 "it has lines to be inserted that don't end with a \".\".");
983 goto error_cleanup;
984 }
985 }
986
987 int added_i = i-1;
988
989 /* It would make no sense to add zero new lines. */
990 if (added_i == added_end) {
991 log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
992 "it has an ed command that tries to insert zero lines.");
993 goto error_cleanup;
994 }
995
996 while (added_i > added_end) {
997 cdline_t *added_line = smartlist_get(diff, added_i--);
998 smartlist_add(cons2, added_line);
999 }
1000 }
1001 }
1002
1003 /* Add remaining unchanged lines. */
1004 for (; j > 0; --j) {
1005 cdline_t *cons_line = smartlist_get(cons1, j-1);
1006 smartlist_add(cons2, cons_line);
1007 }
1008
1009 /* Reverse the whole thing since we did it from the end. */
1010 smartlist_reverse(cons2);
1011 return cons2;
1012
1013 error_cleanup:
1014
1015 smartlist_free(cons2);
1016
1017 return NULL;
1018}
1019
1020/** Generate a consensus diff as a smartlist from two given consensuses, also
1021 * as smartlists. Will return NULL if the consensus diff could not be
1022 * generated. Neither of the two consensuses are modified in any way, so it's
1023 * up to the caller to free their resources.
1024 */
1027 const smartlist_t *cons2,
1028 const consensus_digest_t *digests1,
1029 const consensus_digest_t *digests2,
1030 memarea_t *area)
1031{
1032 smartlist_t *ed_diff = gen_ed_diff(cons1, cons2, area);
1033 /* ed diff could not be generated - reason already logged by gen_ed_diff. */
1034 if (!ed_diff) {
1035 goto error_cleanup;
1036 }
1037
1038 /* See that the script actually produces what we want. */
1039 smartlist_t *ed_cons2 = apply_ed_diff(cons1, ed_diff, 0);
1040 if (!ed_cons2) {
1041 /* LCOV_EXCL_START -- impossible if diff generation is correct */
1042 log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1043 "the generated ed diff could not be tested to successfully generate "
1044 "the target consensus.");
1045 goto error_cleanup;
1046 /* LCOV_EXCL_STOP */
1047 }
1048
1049 int cons2_eq = 1;
1050 if (smartlist_len(cons2) == smartlist_len(ed_cons2)) {
1051 SMARTLIST_FOREACH_BEGIN(cons2, const cdline_t *, line1) {
1052 const cdline_t *line2 = smartlist_get(ed_cons2, line1_sl_idx);
1053 if (!lines_eq(line1, line2)) {
1054 cons2_eq = 0;
1055 break;
1056 }
1057 } SMARTLIST_FOREACH_END(line1);
1058 } else {
1059 cons2_eq = 0;
1060 }
1061 smartlist_free(ed_cons2);
1062 if (!cons2_eq) {
1063 /* LCOV_EXCL_START -- impossible if diff generation is correct. */
1064 log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1065 "the generated ed diff did not generate the target consensus "
1066 "successfully when tested.");
1067 goto error_cleanup;
1068 /* LCOV_EXCL_STOP */
1069 }
1070
1071 char cons1_hash_hex[HEX_DIGEST256_LEN+1];
1072 char cons2_hash_hex[HEX_DIGEST256_LEN+1];
1073 base16_encode(cons1_hash_hex, HEX_DIGEST256_LEN+1,
1074 (const char*)digests1->sha3_256, DIGEST256_LEN);
1075 base16_encode(cons2_hash_hex, HEX_DIGEST256_LEN+1,
1076 (const char*)digests2->sha3_256, DIGEST256_LEN);
1077
1078 /* Create the resulting consensus diff. */
1079 char buf[160];
1080 smartlist_t *result = smartlist_new();
1081 tor_snprintf(buf, sizeof(buf), "%s", ns_diff_version);
1082 smartlist_add_linecpy(result, area, buf);
1083 tor_snprintf(buf, sizeof(buf), "%s %s %s", hash_token,
1084 cons1_hash_hex, cons2_hash_hex);
1085 smartlist_add_linecpy(result, area, buf);
1086 smartlist_add_all(result, ed_diff);
1087 smartlist_free(ed_diff);
1088 return result;
1089
1090 error_cleanup:
1091
1092 if (ed_diff) {
1093 /* LCOV_EXCL_START -- ed_diff is NULL except in unreachable cases above */
1094 smartlist_free(ed_diff);
1095 /* LCOV_EXCL_STOP */
1096 }
1097
1098 return NULL;
1099}
1100
1101/** Fetch the digest of the base consensus in the consensus diff, encoded in
1102 * base16 as found in the diff itself. digest1_out and digest2_out must be of
1103 * length DIGEST256_LEN or larger if not NULL.
1104 */
1105int
1107 char *digest1_out,
1108 char *digest2_out)
1109{
1110 smartlist_t *hash_words = NULL;
1111 const cdline_t *format;
1112 char cons1_hash[DIGEST256_LEN], cons2_hash[DIGEST256_LEN];
1113 char *cons1_hash_hex, *cons2_hash_hex;
1114 if (smartlist_len(diff) < 2) {
1115 log_info(LD_CONSDIFF, "The provided consensus diff is too short.");
1116 goto error_cleanup;
1117 }
1118
1119 /* Check that it's the format and version we know. */
1120 format = smartlist_get(diff, 0);
1121 if (!line_str_eq(format, ns_diff_version)) {
1122 log_warn(LD_CONSDIFF, "The provided consensus diff format is not known.");
1123 goto error_cleanup;
1124 }
1125
1126 /* Grab the base16 digests. */
1127 hash_words = smartlist_new();
1128 {
1129 const cdline_t *line2 = smartlist_get(diff, 1);
1130 char *h = tor_memdup_nulterm(line2->s, line2->len);
1131 smartlist_split_string(hash_words, h, " ", 0, 4);
1132 tor_free(h);
1133 }
1134
1135 /* There have to be three words, the first of which must be hash_token. */
1136 if (smartlist_len(hash_words) != 3 ||
1137 strcmp(smartlist_get(hash_words, 0), hash_token)) {
1138 log_info(LD_CONSDIFF, "The provided consensus diff does not include "
1139 "the necessary digests.");
1140 goto error_cleanup;
1141 }
1142
1143 /* Expected hashes as found in the consensus diff header. They must be of
1144 * length HEX_DIGEST256_LEN, normally 64 hexadecimal characters.
1145 * If any of the decodings fail, error to make sure that the hashes are
1146 * proper base16-encoded digests.
1147 */
1148 cons1_hash_hex = smartlist_get(hash_words, 1);
1149 cons2_hash_hex = smartlist_get(hash_words, 2);
1150 if (strlen(cons1_hash_hex) != HEX_DIGEST256_LEN ||
1151 strlen(cons2_hash_hex) != HEX_DIGEST256_LEN) {
1152 log_info(LD_CONSDIFF, "The provided consensus diff includes "
1153 "base16-encoded digests of incorrect size.");
1154 goto error_cleanup;
1155 }
1156
1157 if (base16_decode(cons1_hash, DIGEST256_LEN,
1158 cons1_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN ||
1159 base16_decode(cons2_hash, DIGEST256_LEN,
1160 cons2_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN) {
1161 log_info(LD_CONSDIFF, "The provided consensus diff includes "
1162 "malformed digests.");
1163 goto error_cleanup;
1164 }
1165
1166 if (digest1_out) {
1167 memcpy(digest1_out, cons1_hash, DIGEST256_LEN);
1168 }
1169 if (digest2_out) {
1170 memcpy(digest2_out, cons2_hash, DIGEST256_LEN);
1171 }
1172
1173 SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1174 smartlist_free(hash_words);
1175 return 0;
1176
1177 error_cleanup:
1178
1179 if (hash_words) {
1180 SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1181 smartlist_free(hash_words);
1182 }
1183 return 1;
1184}
1185
1186/** Apply the consensus diff to the given consensus and return a new
1187 * consensus, also as a line-based smartlist. Will return NULL if the diff
1188 * could not be applied. Neither the consensus nor the diff are modified in
1189 * any way, so it's up to the caller to free their resources.
1190 */
1191char *
1193 const smartlist_t *diff,
1194 const consensus_digest_t *digests1)
1195{
1196 smartlist_t *cons2 = NULL;
1197 char *cons2_str = NULL;
1198 char e_cons1_hash[DIGEST256_LEN];
1199 char e_cons2_hash[DIGEST256_LEN];
1200
1201 if (consdiff_get_digests(diff, e_cons1_hash, e_cons2_hash) != 0) {
1202 goto error_cleanup;
1203 }
1204
1205 /* See that the consensus that was given to us matches its hash. */
1206 if (!consensus_digest_eq(digests1->sha3_256,
1207 (const uint8_t*)e_cons1_hash)) {
1208 char hex_digest1[HEX_DIGEST256_LEN+1];
1209 char e_hex_digest1[HEX_DIGEST256_LEN+1];
1210 log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1211 "the base consensus doesn't match the digest as found in "
1212 "the consensus diff header.");
1213 base16_encode(hex_digest1, HEX_DIGEST256_LEN+1,
1214 (const char *)digests1->sha3_256, DIGEST256_LEN);
1215 base16_encode(e_hex_digest1, HEX_DIGEST256_LEN+1,
1216 e_cons1_hash, DIGEST256_LEN);
1217 log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1218 hex_digest1, e_hex_digest1);
1219 goto error_cleanup;
1220 }
1221
1222 /* Grab the ed diff and calculate the resulting consensus. */
1223 /* Skip the first two lines. */
1224 cons2 = apply_ed_diff(cons1, diff, 2);
1225
1226 /* ed diff could not be applied - reason already logged by apply_ed_diff. */
1227 if (!cons2) {
1228 goto error_cleanup;
1229 }
1230
1231 cons2_str = consensus_join_lines(cons2);
1232
1233 consensus_digest_t cons2_digests;
1234 if (consensus_compute_digest(cons2_str, strlen(cons2_str),
1235 &cons2_digests) < 0) {
1236 /* LCOV_EXCL_START -- digest can't fail */
1237 log_warn(LD_CONSDIFF, "Could not compute digests of the consensus "
1238 "resulting from applying a consensus diff.");
1239 goto error_cleanup;
1240 /* LCOV_EXCL_STOP */
1241 }
1242
1243 /* See that the resulting consensus matches its hash. */
1244 if (!consensus_digest_eq(cons2_digests.sha3_256,
1245 (const uint8_t*)e_cons2_hash)) {
1246 log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1247 "the resulting consensus doesn't match the digest as found in "
1248 "the consensus diff header.");
1249 char hex_digest2[HEX_DIGEST256_LEN+1];
1250 char e_hex_digest2[HEX_DIGEST256_LEN+1];
1251 base16_encode(hex_digest2, HEX_DIGEST256_LEN+1,
1252 (const char *)cons2_digests.sha3_256, DIGEST256_LEN);
1253 base16_encode(e_hex_digest2, HEX_DIGEST256_LEN+1,
1254 e_cons2_hash, DIGEST256_LEN);
1255 log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1256 hex_digest2, e_hex_digest2);
1257 goto error_cleanup;
1258 }
1259
1260 goto done;
1261
1262 error_cleanup:
1263 tor_free(cons2_str); /* Sets it to NULL */
1264
1265 done:
1266 if (cons2) {
1267 smartlist_free(cons2);
1268 }
1269
1270 return cons2_str;
1271}
1272
1273/** Any consensus line longer than this means that the input is invalid. */
1274#define CONSENSUS_LINE_MAX_LEN (1<<20)
1275
1276/**
1277 * Helper: For every NL-terminated line in <b>s</b>, add a cdline referring to
1278 * that line (without trailing newline) to <b>out</b>. Return -1 if there are
1279 * any non-NL terminated lines; 0 otherwise.
1280 *
1281 * Unlike tor_split_lines, this function avoids ambiguity on its
1282 * handling of a final line that isn't NL-terminated.
1283 *
1284 * All cdline_t objects are allocated in the provided memarea. Strings
1285 * are not copied: if <b>s</b> changes or becomes invalid, then all
1286 * generated cdlines will become invalid.
1287 */
1288STATIC int
1290 const char *s, size_t len,
1291 memarea_t *area)
1292{
1293 const char *end_of_str = s + len;
1294
1295 while (s < end_of_str) {
1296 const char *eol = memchr(s, '\n', end_of_str - s);
1297 if (!eol) {
1298 /* File doesn't end with newline. */
1299 return -1;
1300 }
1301 if (eol - s > CONSENSUS_LINE_MAX_LEN) {
1302 /* Line is far too long. */
1303 return -1;
1304 }
1305 cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
1306 line->s = s;
1307 line->len = (uint32_t)(eol - s);
1308 smartlist_add(out, line);
1309 s = eol+1;
1310 }
1311 return 0;
1312}
1313
1314/** Given a list of cdline_t, return a newly allocated string containing
1315 * all of the lines, terminated with NL, concatenated.
1316 *
1317 * Unlike smartlist_join_strings(), avoids lossy operations on empty
1318 * lists. */
1319static char *
1321{
1322 size_t n = 0;
1323 SMARTLIST_FOREACH(inp, const cdline_t *, cdline, n += cdline->len + 1);
1324 n += 1;
1325 char *result = tor_malloc(n);
1326 char *out = result;
1327 SMARTLIST_FOREACH_BEGIN(inp, const cdline_t *, cdline) {
1328 memcpy(out, cdline->s, cdline->len);
1329 out += cdline->len;
1330 *out++ = '\n';
1331 } SMARTLIST_FOREACH_END(cdline);
1332 *out++ = '\0';
1333 tor_assert(out == result+n);
1334 return result;
1335}
1336
1337/** Given two consensus documents, try to compute a diff between them. On
1338 * success, return a newly allocated string containing that diff. On failure,
1339 * return NULL. */
1340char *
1341consensus_diff_generate(const char *cons1, size_t cons1len,
1342 const char *cons2, size_t cons2len)
1343{
1344 consensus_digest_t d1, d2;
1345 smartlist_t *lines1 = NULL, *lines2 = NULL, *result_lines = NULL;
1346 int r1, r2;
1347 char *result = NULL;
1348
1349 r1 = consensus_compute_digest_as_signed(cons1, cons1len, &d1);
1350 r2 = consensus_compute_digest(cons2, cons2len, &d2);
1351 if (BUG(r1 < 0 || r2 < 0))
1352 return NULL; // LCOV_EXCL_LINE
1353
1354 memarea_t *area = memarea_new();
1355 lines1 = smartlist_new();
1356 lines2 = smartlist_new();
1357 if (consensus_split_lines(lines1, cons1, cons1len, area) < 0)
1358 goto done;
1359 if (consensus_split_lines(lines2, cons2, cons2len, area) < 0)
1360 goto done;
1361
1362 result_lines = consdiff_gen_diff(lines1, lines2, &d1, &d2, area);
1363
1364 done:
1365 if (result_lines) {
1366 result = consensus_join_lines(result_lines);
1367 smartlist_free(result_lines);
1368 }
1369
1370 memarea_drop_all(area);
1371 smartlist_free(lines1);
1372 smartlist_free(lines2);
1373
1374 return result;
1375}
1376
1377/** Given a consensus document and a diff, try to apply the diff to the
1378 * consensus. On success return a newly allocated string containing the new
1379 * consensus. On failure, return NULL. */
1380char *
1381consensus_diff_apply(const char *consensus,
1382 size_t consensus_len,
1383 const char *diff,
1384 size_t diff_len)
1385{
1386 consensus_digest_t d1;
1387 smartlist_t *lines1 = NULL, *lines2 = NULL;
1388 int r1;
1389 char *result = NULL;
1390 memarea_t *area = memarea_new();
1391
1392 r1 = consensus_compute_digest_as_signed(consensus, consensus_len, &d1);
1393 if (BUG(r1 < 0))
1394 goto done;
1395
1396 lines1 = smartlist_new();
1397 lines2 = smartlist_new();
1398 if (consensus_split_lines(lines1, consensus, consensus_len, area) < 0)
1399 goto done;
1400 if (consensus_split_lines(lines2, diff, diff_len, area) < 0)
1401 goto done;
1402
1403 result = consdiff_apply_diff(lines1, lines2, &d1);
1404
1405 done:
1406 smartlist_free(lines1);
1407 smartlist_free(lines2);
1408 memarea_drop_all(area);
1409
1410 return result;
1411}
1412
1413/** Return true iff, based on its header, <b>document</b> is likely
1414 * to be a consensus diff. */
1415int
1416looks_like_a_consensus_diff(const char *document, size_t len)
1417{
1418 return (len >= strlen(ns_diff_version) &&
1419 fast_memeq(document, ns_diff_version, strlen(ns_diff_version)));
1420}
#define X
Definition: binascii.c:356
int base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:506
void base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:478
unsigned int bitarray_t
Definition: bitarray.h:30
static bitarray_t * bitarray_init_zero(unsigned int n_bits)
Definition: bitarray.h:33
static void bitarray_set(bitarray_t *b, int bit)
Definition: bitarray.h:68
static unsigned int bitarray_is_set(bitarray_t *b, int bit)
Definition: bitarray.h:81
#define MAX(a, b)
Definition: cmp.h:22
STATIC void smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
Definition: consdiff.c:94
#define CONSENSUS_LINE_MAX_LEN
Definition: consdiff.c:1274
STATIC int line_str_eq(const cdline_t *a, const char *b)
Definition: consdiff.c:60
static cdline_t * preprocess_consensus(memarea_t *area, smartlist_t *cons)
Definition: consdiff.c:581
STATIC int * lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2, int direction)
Definition: consdiff.c:165
char * consdiff_apply_diff(const smartlist_t *cons1, const smartlist_t *diff, const consensus_digest_t *digests1)
Definition: consdiff.c:1192
#define ROUTER_ID_ITERATOR_INIT
Definition: consdiff.c:537
static cdline_t * cdline_linecpy(memarea_t *area, const char *s)
Definition: consdiff.c:81
STATIC void trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
Definition: consdiff.c:213
STATIC smartlist_t * apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff, int diff_starting_line)
Definition: consdiff.c:859
STATIC void set_changed(bitarray_t *changed1, bitarray_t *changed2, const smartlist_slice_t *slice1, const smartlist_slice_t *slice2)
Definition: consdiff.c:264
static int find_next_router_line(const smartlist_t *cons, const char *consname, int *idxp, router_id_iterator_t *iter)
Definition: consdiff.c:549
STATIC int consensus_digest_eq(const uint8_t *d1, const uint8_t *d2)
Definition: consdiff.c:129
STATIC int base64cmp(const cdline_t *hash1, const cdline_t *hash2)
Definition: consdiff.c:472
STATIC int next_router(const smartlist_t *cons, int cur)
Definition: consdiff.c:449
static int line_starts_with_str(const cdline_t *a, const char *b)
Definition: consdiff.c:71
STATIC void calc_changes(smartlist_slice_t *slice1, smartlist_slice_t *slice2, bitarray_t *changed1, bitarray_t *changed2)
Definition: consdiff.c:327
char * consensus_diff_generate(const char *cons1, size_t cons1len, const char *cons2, size_t cons2len)
Definition: consdiff.c:1341
STATIC int consensus_compute_digest_as_signed(const char *cons, size_t len, consensus_digest_t *digest_out)
Definition: consdiff.c:118
smartlist_t * consdiff_gen_diff(const smartlist_t *cons1, const smartlist_t *cons2, const consensus_digest_t *digests1, const consensus_digest_t *digests2, memarea_t *area)
Definition: consdiff.c:1026
static char * consensus_join_lines(const smartlist_t *inp)
Definition: consdiff.c:1320
STATIC int is_valid_router_entry(const cdline_t *line)
Definition: consdiff.c:435
char * consensus_diff_apply(const char *consensus, size_t consensus_len, const char *diff, size_t diff_len)
Definition: consdiff.c:1381
STATIC smartlist_t * gen_ed_diff(const smartlist_t *cons1_orig, const smartlist_t *cons2, memarea_t *area)
Definition: consdiff.c:625
STATIC int consensus_split_lines(smartlist_t *out, const char *s, size_t len, memarea_t *area)
Definition: consdiff.c:1289
STATIC smartlist_slice_t * smartlist_slice(const smartlist_t *list, int start, int end)
Definition: consdiff.c:140
STATIC int get_id_hash(const cdline_t *line, cdline_t *hash_out)
Definition: consdiff.c:395
STATIC int lines_eq(const cdline_t *a, const cdline_t *b)
Definition: consdiff.c:53
int looks_like_a_consensus_diff(const char *document, size_t len)
Definition: consdiff.c:1416
STATIC int smartlist_slice_string_pos(const smartlist_slice_t *slice, const cdline_t *string)
Definition: consdiff.c:245
#define START_OF_SIGNATURES_SECTION
Definition: consdiff.c:570
int consdiff_get_digests(const smartlist_t *diff, char *digest1_out, char *digest2_out)
Definition: consdiff.c:1106
STATIC int consensus_compute_digest(const char *cons, size_t len, consensus_digest_t *digest_out)
Definition: consdiff.c:105
Header for consdiff.c.
#define HEX_DIGEST256_LEN
Definition: crypto_digest.h:37
int crypto_digest256(char *digest, const char *m, size_t len, digest_algorithm_t algorithm)
#define fast_memeq(a, b, c)
Definition: di_ops.h:35
#define fast_memneq(a, b, c)
Definition: di_ops.h:42
#define DIGEST256_LEN
Definition: digest_sizes.h:23
#define LD_CONSDIFF
Definition: log.h:111
#define LD_BUG
Definition: log.h:86
#define tor_free(p)
Definition: malloc.h:56
void * memarea_alloc(memarea_t *area, size_t sz)
Definition: memarea.c:209
memarea_t * memarea_new(void)
Definition: memarea.c:153
void * memarea_memdup(memarea_t *area, const void *s, size_t n)
Definition: memarea.c:257
Header for memarea.c.
#define memarea_drop_all(area)
Definition: memarea.h:22
Header file for ns_parse.c.
int router_get_networkstatus_v3_sha3_as_signed(uint8_t *digest_out, const char *s, size_t len)
Definition: ns_parse.c:179
Master header file for Tor-specific functionality.
long tor_parse_long(const char *s, int base, long min, long max, int *ok, char **next)
Definition: parse_int.c:59
int tor_snprintf(char *str, size_t size, const char *format,...)
Definition: printf.c:27
void smartlist_reverse(smartlist_t *sl)
Definition: smartlist.c:59
void smartlist_add_all(smartlist_t *s1, const smartlist_t *s2)
smartlist_t * smartlist_new(void)
void smartlist_add(smartlist_t *sl, void *element)
void smartlist_del(smartlist_t *sl, int idx)
#define SMARTLIST_FOREACH_BEGIN(sl, type, var)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
int smartlist_split_string(smartlist_t *sl, const char *str, const char *sep, int flags, int max)
#define STATIC
Definition: testsupport.h:32
#define MOCK_IMPL(rv, funcname, arglist)
Definition: testsupport.h:133
#define tor_assert(expr)
Definition: util_bug.h:103