rephist.c 85.8 KB
Newer Older
1
/* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
2
 * Copyright (c) 2007-2021, The Tor Project, Inc. */
3
4
/* See LICENSE for licensing information */

Nick Mathewson's avatar
Nick Mathewson committed
5
6
/**
 * \file rephist.c
7
8
9
 * \brief Basic history and performance-tracking functionality.
 *
 * Basic history and performance-tracking functionality to remember
10
 *    which servers have worked in the past, how much bandwidth we've
11
 *    been using, which ports we tend to want, and so on; further,
12
 *    exit port statistics, cell statistics, and connection statistics.
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
 *
 * The history and information tracked in this module could sensibly be
 * divided into several categories:
 *
 * <ul><li>Statistics used by authorities to remember the uptime and
 * stability information about various relays, including "uptime",
 * "weighted fractional uptime" and "mean time between failures".
 *
 * <li>Predicted ports, used by clients to remember how long it's been
 * since they opened an exit connection to each given target
 * port. Clients use this information in order to try to keep circuits
 * open to exit nodes that can connect to the ports that they care
 * about.  (The predicted ports mechanism also handles predicted circuit
 * usage that _isn't_ port-specific, such as resolves, internal circuits,
 * and so on.)
 *
 * <li>Public key operation counters, for tracking how many times we've
 * done each public key operation.  (This is unmaintained and we should
 * remove it.)
 *
 * <li>Exit statistics by port, used by exits to keep track of the
 * number of streams and bytes they've served at each exit port, so they
 * can generate their exit-kibibytes-{read,written} and
 * exit-streams-opened statistics.
 *
 * <li>Circuit stats, used by relays instances to tract circuit
 * queue fullness and delay over time, and generate cell-processed-cells,
 * cell-queued-cells, cell-time-in-queue, and cell-circuits-per-decile
 * statistics.
 *
 * <li>Descriptor serving statistics, used by directory caches to track
 * how many descriptors they've served.
 *
 * <li>Onion handshake statistics, used by relays to count how many
 * TAP and ntor handshakes they've handled.
 *
 * <li>Hidden service statistics, used by relays to count rendezvous
 * traffic and HSDir-stored descriptors.
 *
 * <li>Link protocol statistics, used by relays to count how many times
 * each link protocol has been used.
 *
 * </ul>
 *
 * The entry points for this module are scattered throughout the
 * codebase.  Sending data, receiving data, connecting to a relay,
 * losing a connection to a relay, and so on can all trigger a change in
 * our current stats.  Relays also invoke this module in order to
 * extract their statistics when building routerinfo and extrainfo
 * objects in router.c.
 *
 * TODO: This module should be broken up.
 *
 * (The "rephist" name originally stood for "reputation and history". )
Nick Mathewson's avatar
Nick Mathewson committed
67
 **/
68

69
#define REPHIST_PRIVATE
70
71
#include "core/or/or.h"
#include "app/config/config.h"
72
73
74
#include "core/or/circuitlist.h"
#include "core/or/connection_or.h"
#include "feature/dirauth/authmode.h"
75
76
#include "feature/nodelist/networkstatus.h"
#include "feature/nodelist/nodelist.h"
77
#include "feature/stats/predict_ports.h"
78
#include "feature/stats/connstats.h"
79
80
81
82
#include "feature/stats/rephist.h"
#include "lib/container/order.h"
#include "lib/crypt_ops/crypto_rand.h"
#include "lib/math/laplace.h"
83

84
85
#include "feature/nodelist/networkstatus_st.h"
#include "core/or/or_circuit_st.h"
86

87
88
#include <event2/dns.h>

89
90
91
92
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif

Roger Dingledine's avatar
Roger Dingledine committed
93
/** Total number of bytes currently allocated in fields used by rephist.c. */
94
uint64_t rephist_total_alloc=0;
Roger Dingledine's avatar
Roger Dingledine committed
95
/** Number of or_history_t objects currently allocated. */
96
uint32_t rephist_total_num=0;
97

98
99
/** If the total weighted run count of all runs for a router ever falls
 * below this amount, the router can be treated as having 0 MTBF. */
100
#define STABILITY_EPSILON   0.0001
Nick Mathewson's avatar
Nick Mathewson committed
101
/** Value by which to discount all old intervals for MTBF purposes.  This
102
103
104
 * is compounded every STABILITY_INTERVAL. */
#define STABILITY_ALPHA     0.95
/** Interval at which to discount all old intervals for MTBF purposes. */
105
#define STABILITY_INTERVAL  (12*60*60)
106
107
108
109
/* (This combination of ALPHA, INTERVAL, and EPSILON makes it so that an
 * interval that just ended counts twice as much as one that ended a week ago,
 * 20X as much as one that ended a month ago, and routers that have had no
 * uptime data for about half a year will get forgotten.) */
110

Nick Mathewson's avatar
Nick Mathewson committed
111
/** History of an OR. */
112
typedef struct or_history_t {
Nick Mathewson's avatar
Nick Mathewson committed
113
  /** When did we start tracking this OR? */
114
  time_t since;
115
116
  /** When did we most recently note a change to this OR? */
  time_t changed;
117

118
  /** The address at which we most recently connected to this OR
119
   * successfully. */
120
121
  tor_addr_t last_reached_addr;

122
123
124
  /** The port at which we most recently connected to this OR successfully */
  uint16_t last_reached_port;

125
126
127
  /* === For MTBF tracking: */
  /** Weighted sum total of all times that this router has been online.
   */
128
  unsigned long weighted_run_length;
129
130
  /** If the router is now online (according to stability-checking rules),
   * when did it come online? */
131
  time_t start_of_run;
132
  /** Sum of weights for runs in weighted_run_length. */
133
  double total_run_weights;
134
135
136
137
  /* === For fractional uptime tracking: */
  time_t start_of_downtime;
  unsigned long weighted_uptime;
  unsigned long total_weighted_time;
138
139
} or_history_t;

140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/**
 * This structure holds accounting needed to calculate the padding overhead.
 */
typedef struct padding_counts_t {
  /** Total number of cells we have received, including padding */
  uint64_t read_cell_count;
  /** Total number of cells we have sent, including padding */
  uint64_t write_cell_count;
  /** Total number of CELL_PADDING cells we have received */
  uint64_t read_pad_cell_count;
  /** Total number of CELL_PADDING cells we have sent */
  uint64_t write_pad_cell_count;
  /** Total number of read cells on padding-enabled conns */
  uint64_t enabled_read_cell_count;
  /** Total number of sent cells on padding-enabled conns */
  uint64_t enabled_write_cell_count;
  /** Total number of read CELL_PADDING cells on padding-enabled cons */
  uint64_t enabled_read_pad_cell_count;
  /** Total number of sent CELL_PADDING cells on padding-enabled cons */
  uint64_t enabled_write_pad_cell_count;
  /** Total number of RELAY_DROP cells we have received */
  uint64_t read_drop_cell_count;
  /** Total number of RELAY_DROP cells we have sent */
  uint64_t write_drop_cell_count;
  /** The maximum number of padding timers we've seen in 24 hours */
  uint64_t maximum_chanpad_timers;
  /** When did we first copy padding_current into padding_published? */
  char first_published_at[ISO_TIME_LEN+1];
} padding_counts_t;

/** Holds the current values of our padding statistics.
 * It is not published until it is transferred to padding_published. */
static padding_counts_t padding_current;

/** Remains fixed for a 24 hour period, and then is replaced
 * by a redacted copy of padding_current */
static padding_counts_t padding_published;

178
179
/** When did we last multiply all routers' weighted_run_length and
 * total_run_weights by STABILITY_ALPHA? */
180
181
static time_t stability_last_downrated = 0;

182
183
184
/**  */
static time_t started_tracking_stability = 0;

Nick Mathewson's avatar
Nick Mathewson committed
185
/** Map from hex OR identity digest to or_history_t. */
186
static digestmap_t *history_map = NULL;
187

188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
/** Represents a state of overload stats.
 *
 *  All the timestamps in this structure have already been rounded down to the
 *  nearest hour. */
typedef struct {
  /* When did we last experience a general overload? */
  time_t overload_general_time;

  /* When did we last experience a bandwidth-related overload? */
  time_t overload_ratelimits_time;
  /* How many times have we gone off the our read limits? */
  uint64_t overload_read_count;
  /* How many times have we gone off the our write limits? */
  uint64_t overload_write_count;

  /* When did we last experience a file descriptor exhaustion? */
  time_t overload_fd_exhausted_time;
  /* How many times have we experienced a file descriptor exhaustion? */
  uint64_t overload_fd_exhausted;
} overload_stats_t;

/** Current state of overload stats */
static overload_stats_t overload_stats;

212
213
214
215
216
/** Counters to count the number of times we've reached an overload for the
 * global connection read/write limit. Reported on the MetricsPort. */
static uint64_t stats_n_read_limit_reached = 0;
static uint64_t stats_n_write_limit_reached = 0;

217
218
219
/** Total number of times we've reached TCP port exhaustion. */
static uint64_t stats_n_tcp_exhaustion = 0;

220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
/***** DNS statistics *****/

/** Represents the statistics of DNS queries seen if it is an Exit. */
typedef struct {
  /* Total number of DNS errors found in RFC 1035 (from 0 to 5 code). */
  uint64_t stats_n_error_none;          /* 0 */
  uint64_t stats_n_error_format;        /* 1 */
  uint64_t stats_n_error_serverfailed;  /* 2 */
  uint64_t stats_n_error_notexist;      /* 3 */
  uint64_t stats_n_error_notimpl;       /* 4 */
  uint64_t stats_n_error_refused;       /* 5 */

  /* Total number of DNS errors specific to libevent. */
  uint64_t stats_n_error_truncated; /* 65 */
  uint64_t stats_n_error_unknown;   /* 66 */
  uint64_t stats_n_error_timeout;   /* 67 */
  uint64_t stats_n_error_shutdown;  /* 68 */
  uint64_t stats_n_error_cancel;    /* 69 */
  uint64_t stats_n_error_nodata;    /* 70 */

  /* Total number of DNS request seen at an Exit. They might not all end
   * successfully or might even be lost by tor. This counter is incremented
   * right before the DNS request is initiated. */
  uint64_t stats_n_request;
} dns_stats_t;

/** DNS statistics store for each DNS record type for which tor supports only
 * three at the moment: A, PTR and AAAA. */
static dns_stats_t dns_A_stats;
static dns_stats_t dns_PTR_stats;
static dns_stats_t dns_AAAA_stats;

/** From a libevent record type, return a pointer to the corresponding DNS
 * statistics store. NULL is returned if the type is unhandled. */
static inline dns_stats_t *
get_dns_stats_by_type(const int type)
{
  switch (type) {
  case DNS_IPv4_A:
    return &dns_A_stats;
  case DNS_PTR:
    return &dns_PTR_stats;
  case DNS_IPv6_AAAA:
    return &dns_AAAA_stats;
  default:
    return NULL;
  }
}

/** Return the DNS error count for the given libevent DNS type and error code.
 * The possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA. */
uint64_t
rep_hist_get_n_dns_error(int type, uint8_t error)
{
  dns_stats_t *dns_stats = get_dns_stats_by_type(type);
  if (BUG(!dns_stats)) {
    return 0;
  }

  switch (error) {
  case DNS_ERR_NONE:
    return dns_stats->stats_n_error_none;
  case DNS_ERR_FORMAT:
    return dns_stats->stats_n_error_format;
  case DNS_ERR_SERVERFAILED:
    return dns_stats->stats_n_error_serverfailed;
  case DNS_ERR_NOTEXIST:
    return dns_stats->stats_n_error_notexist;
  case DNS_ERR_NOTIMPL:
    return dns_stats->stats_n_error_notimpl;
  case DNS_ERR_REFUSED:
    return dns_stats->stats_n_error_refused;
  case DNS_ERR_TRUNCATED:
    return dns_stats->stats_n_error_truncated;
  case DNS_ERR_UNKNOWN:
    return dns_stats->stats_n_error_unknown;
  case DNS_ERR_TIMEOUT:
    return dns_stats->stats_n_error_timeout;
  case DNS_ERR_SHUTDOWN:
    return dns_stats->stats_n_error_shutdown;
  case DNS_ERR_CANCEL:
    return dns_stats->stats_n_error_cancel;
  case DNS_ERR_NODATA:
    return dns_stats->stats_n_error_nodata;
  default:
    /* Unhandled code sent back by libevent. */
    return 0;
  }
}

/** Return the total number of DNS request seen for the given libevent DNS
 * record type. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA. */
uint64_t
rep_hist_get_n_dns_request(int type)
{
  dns_stats_t *dns_stats = get_dns_stats_by_type(type);
  if (BUG(!dns_stats)) {
    return 0;
  }
  return dns_stats->stats_n_request;
}

/** Note a DNS error for the given given libevent DNS record type and error
 * code. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA. */
void
rep_hist_note_dns_error(int type, uint8_t error)
{
  dns_stats_t *dns_stats = get_dns_stats_by_type(type);
328
329
  /* Unsupported DNS query type. */
  if (!dns_stats) {
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
    return;
  }

  switch (error) {
  case DNS_ERR_NONE:
    dns_stats->stats_n_error_none++;
    break;
  case DNS_ERR_FORMAT:
    dns_stats->stats_n_error_format++;
    break;
  case DNS_ERR_SERVERFAILED:
    dns_stats->stats_n_error_serverfailed++;
    break;
  case DNS_ERR_NOTEXIST:
    dns_stats->stats_n_error_notexist++;
    break;
  case DNS_ERR_NOTIMPL:
    dns_stats->stats_n_error_notimpl++;
    break;
  case DNS_ERR_REFUSED:
    dns_stats->stats_n_error_refused++;
    break;
  case DNS_ERR_TRUNCATED:
    dns_stats->stats_n_error_truncated++;
    break;
  case DNS_ERR_UNKNOWN:
    dns_stats->stats_n_error_unknown++;
    break;
  case DNS_ERR_TIMEOUT:
    dns_stats->stats_n_error_timeout++;
    break;
  case DNS_ERR_SHUTDOWN:
    dns_stats->stats_n_error_shutdown++;
    break;
  case DNS_ERR_CANCEL:
    dns_stats->stats_n_error_cancel++;
    break;
  case DNS_ERR_NODATA:
    dns_stats->stats_n_error_nodata++;
    break;
  default:
    /* Unhandled code sent back by libevent. */
    break;
  }
}

/** Note a DNS request for the given given libevent DNS record type. */
void
rep_hist_note_dns_request(int type)
{
  dns_stats_t *dns_stats = get_dns_stats_by_type(type);
  if (BUG(!dns_stats)) {
    return;
  }
  dns_stats->stats_n_request++;
}

/***** END of DNS statistics *****/

389
390
/** Return true if this overload happened within the last `n_hours`. */
static bool
391
overload_happened_recently(time_t overload_time, int n_hours)
392
393
394
395
396
397
398
399
400
401
402
{
  /* An overload is relevant if it happened in the last 72 hours */
  if (overload_time > approx_time() - 3600 * n_hours) {
    return true;
  }
  return false;
}

/* The current version of the overload stats version */
#define OVERLOAD_STATS_VERSION 1

403
404
405
406
407
408
409
410
411
412
413
414
415
416
/** Return the stats_n_read_limit_reached counter. */
uint64_t
rep_hist_get_n_read_limit_reached(void)
{
  return stats_n_read_limit_reached;
}

/** Return the stats_n_write_limit_reached counter. */
uint64_t
rep_hist_get_n_write_limit_reached(void)
{
  return stats_n_write_limit_reached;
}

417
418
/** Returns an allocated string for server descriptor for publising information
 * on whether we are overloaded or not. */
419
char *
420
rep_hist_get_overload_general_line(void)
421
422
423
424
{
  char *result = NULL;
  char tbuf[ISO_TIME_LEN+1];

425
  /* Encode the general overload */
426
427
  if (overload_happened_recently(overload_stats.overload_general_time, 72)) {
    format_iso_time(tbuf, overload_stats.overload_general_time);
428
429
    tor_asprintf(&result, "overload-general %d %s\n",
                 OVERLOAD_STATS_VERSION, tbuf);
430
431
  }

432
433
434
435
436
437
438
439
440
441
442
443
444
  return result;
}

/** Returns an allocated string for extra-info documents for publishing
 *  overload statistics. */
char *
rep_hist_get_overload_stats_lines(void)
{
  char *result = NULL;
  smartlist_t *chunks = smartlist_new();
  char tbuf[ISO_TIME_LEN+1];

  /* Add bandwidth-related overloads */
445
446
447
448
449
  if (overload_happened_recently(overload_stats.overload_ratelimits_time,24)) {
    const or_options_t *options = get_options();
    format_iso_time(tbuf, overload_stats.overload_ratelimits_time);
    smartlist_add_asprintf(chunks,
                           "overload-ratelimits %d %s %" PRIu64 " %" PRIu64
450
                           " %" PRIu64 " %" PRIu64 "\n",
451
452
453
454
455
456
457
458
459
460
                           OVERLOAD_STATS_VERSION, tbuf,
                           options->BandwidthRate, options->BandwidthBurst,
                           overload_stats.overload_read_count,
                           overload_stats.overload_write_count);
  }

  /* Finally file descriptor overloads */
  if (overload_happened_recently(
                              overload_stats.overload_fd_exhausted_time, 72)) {
    format_iso_time(tbuf, overload_stats.overload_fd_exhausted_time);
461
    smartlist_add_asprintf(chunks, "overload-fd-exhausted %d %s\n",
462
463
464
465
466
467
468
469
                           OVERLOAD_STATS_VERSION, tbuf);
  }

  /* Bail early if we had nothing to write */
  if (smartlist_len(chunks) == 0) {
    goto done;
  }

470
  result = smartlist_join_strings(chunks, "", 0, NULL);
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486

 done:
  SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp));
  smartlist_free(chunks);
  return result;
}

/** Round down the time in `a` to the beginning of the current hour */
#define SET_TO_START_OF_HOUR(a) STMT_BEGIN \
  (a) = approx_time() - (approx_time() % 3600); \
STMT_END

/** Note down an overload event of type `overload`. */
void
rep_hist_note_overload(overload_type_t overload)
{
487
488
489
  static time_t last_read_counted = 0;
  static time_t last_write_counted = 0;

490
491
492
493
  switch (overload) {
  case OVERLOAD_GENERAL:
    SET_TO_START_OF_HOUR(overload_stats.overload_general_time);
    break;
494
  case OVERLOAD_READ: {
495
    stats_n_read_limit_reached++;
496
    SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time);
497
498
499
500
    if (approx_time() >= last_read_counted + 60) { /* Count once a minute */
        overload_stats.overload_read_count++;
        last_read_counted = approx_time();
    }
501
    break;
502
503
  }
  case OVERLOAD_WRITE: {
504
    stats_n_write_limit_reached++;
505
    SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time);
506
507
508
509
    if (approx_time() >= last_write_counted + 60) { /* Count once a minute */
      overload_stats.overload_write_count++;
      last_write_counted = approx_time();
    }
510
    break;
511
  }
512
513
514
515
516
517
518
  case OVERLOAD_FD_EXHAUSTED:
    SET_TO_START_OF_HOUR(overload_stats.overload_fd_exhausted_time);
    overload_stats.overload_fd_exhausted++;
    break;
  }
}

519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
/** Note down that we've reached a TCP port exhaustion. This triggers an
 * overload general event. */
void
rep_hist_note_tcp_exhaustion(void)
{
  stats_n_tcp_exhaustion++;
  rep_hist_note_overload(OVERLOAD_GENERAL);
}

/** Return the total number of TCP exhaustion times we've reached. */
uint64_t
rep_hist_get_n_tcp_exhaustion(void)
{
  return stats_n_tcp_exhaustion;
}

535
536
/** Return the or_history_t for the OR with identity digest <b>id</b>,
 * creating it if necessary. */
537
538
static or_history_t *
get_or_history(const char* id)
539
540
{
  or_history_t *hist;
541

542
  if (tor_digest_is_zero(id))
543
544
    return NULL;

545
  hist = digestmap_get(history_map, id);
546
547
  if (!hist) {
    hist = tor_malloc_zero(sizeof(or_history_t));
548
    rephist_total_alloc += sizeof(or_history_t);
549
    rephist_total_num++;
550
    hist->since = hist->changed = time(NULL);
551
    tor_addr_make_unspec(&hist->last_reached_addr);
552
    digestmap_set(history_map, id, hist);
553
554
555
556
  }
  return hist;
}

Roger Dingledine's avatar
Roger Dingledine committed
557
/** Helper: free storage held by a single OR history entry. */
558
static void
559
free_or_history(void *_hist)
560
{
561
  or_history_t *hist = _hist;
562
  rephist_total_alloc -= sizeof(or_history_t);
563
  rephist_total_num--;
564
565
566
  tor_free(hist);
}

Roger Dingledine's avatar
Roger Dingledine committed
567
/** Initialize the static data structures for tracking history. */
568
569
void
rep_hist_init(void)
570
{
571
  history_map = digestmap_new();
572
573
}

574
575
/** We have just decided that this router with identity digest <b>id</b> is
 * reachable, meaning we will give it a "Running" flag for the next while. */
576
void
577
rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr,
578
                               const uint16_t at_port, time_t when)
579
{
580
  or_history_t *hist = get_or_history(id);
581
582
  int was_in_run = 1;
  char tbuf[ISO_TIME_LEN+1];
583
  int addr_changed, port_changed;
584

585
  tor_assert(hist);
Sebastian Hahn's avatar
Sebastian Hahn committed
586
  tor_assert((!at_addr && !at_port) || (at_addr && at_port));
587

588
  addr_changed = at_addr && !tor_addr_is_null(&hist->last_reached_addr) &&
589
    tor_addr_compare(at_addr, &hist->last_reached_addr, CMP_EXACT) != 0;
590
591
  port_changed = at_port && hist->last_reached_port &&
                 at_port != hist->last_reached_port;
592

593
594
  if (!started_tracking_stability)
    started_tracking_stability = time(NULL);
595
  if (!hist->start_of_run) {
596
    hist->start_of_run = when;
597
    was_in_run = 0;
598
  }
599
  if (hist->start_of_downtime) {
600
601
602
603
604
605
606
607
608
    long down_length;

    format_local_iso_time(tbuf, hist->start_of_downtime);
    log_info(LD_HIST, "Router %s is now Running; it had been down since %s.",
             hex_str(id, DIGEST_LEN), tbuf);
    if (was_in_run)
      log_info(LD_HIST, "  (Paradoxically, it was already Running too.)");

    down_length = when - hist->start_of_downtime;
609
610
    hist->total_weighted_time += down_length;
    hist->start_of_downtime = 0;
611
  } else if (addr_changed || port_changed) {
612
613
614
615
616
617
    /* If we're reachable, but the address changed, treat this as some
     * downtime. */
    int penalty = get_options()->TestingTorNetwork ? 240 : 3600;
    networkstatus_t *ns;

    if ((ns = networkstatus_get_latest_consensus())) {
618
619
      int fresh_interval = (int)(ns->fresh_until - ns->valid_after);
      int live_interval = (int)(ns->valid_until - ns->valid_after);
620
621
622
      /* on average, a descriptor addr change takes .5 intervals to make it
       * into a consensus, and half a liveness period to make it to
       * clients. */
623
      penalty = (int)(fresh_interval + live_interval) / 2;
624
625
    }
    format_local_iso_time(tbuf, hist->start_of_run);
Sebastian Hahn's avatar
Sebastian Hahn committed
626
627
628
629
    log_info(LD_HIST,"Router %s still seems Running, but its address appears "
             "to have changed since the last time it was reachable.  I'm "
             "going to treat it as having been down for %d seconds",
             hex_str(id, DIGEST_LEN), penalty);
630
    rep_hist_note_router_unreachable(id, when-penalty);
631
    rep_hist_note_router_reachable(id, NULL, 0, when);
632
633
634
635
636
637
  } else {
    format_local_iso_time(tbuf, hist->start_of_run);
    if (was_in_run)
      log_debug(LD_HIST, "Router %s is still Running; it has been Running "
                "since %s", hex_str(id, DIGEST_LEN), tbuf);
    else
Nick Mathewson's avatar
Nick Mathewson committed
638
      log_info(LD_HIST,"Router %s is now Running; it was previously untracked",
639
               hex_str(id, DIGEST_LEN));
640
  }
641
642
  if (at_addr)
    tor_addr_copy(&hist->last_reached_addr, at_addr);
643
644
  if (at_port)
    hist->last_reached_port = at_port;
645
646
647
648
649
650
651
}

/** We have just decided that this router is unreachable, meaning
 * we are taking away its "Running" flag. */
void
rep_hist_note_router_unreachable(const char *id, time_t when)
{
652
  or_history_t *hist = get_or_history(id);
653
654
  char tbuf[ISO_TIME_LEN+1];
  int was_running = 0;
655
656
  if (!started_tracking_stability)
    started_tracking_stability = time(NULL);
657
658
659

  tor_assert(hist);
  if (hist->start_of_run) {
660
    /*XXXX We could treat failed connections differently from failed
661
     * connect attempts. */
662
    long run_length = when - hist->start_of_run;
663
664
    format_local_iso_time(tbuf, hist->start_of_run);

665
666
    hist->total_run_weights += 1.0;
    hist->start_of_run = 0;
667
668
669
670
671
672
673
674
675
676
677
678
    if (run_length < 0) {
      unsigned long penalty = -run_length;
#define SUBTRACT_CLAMPED(var, penalty) \
      do { (var) = (var) < (penalty) ? 0 : (var) - (penalty); } while (0)

      SUBTRACT_CLAMPED(hist->weighted_run_length, penalty);
      SUBTRACT_CLAMPED(hist->weighted_uptime, penalty);
    } else {
      hist->weighted_run_length += run_length;
      hist->weighted_uptime += run_length;
      hist->total_weighted_time += run_length;
    }
679
680
681
682
683
    was_running = 1;
    log_info(LD_HIST, "Router %s is now non-Running: it had previously been "
             "Running since %s.  Its total weighted uptime is %lu/%lu.",
             hex_str(id, DIGEST_LEN), tbuf, hist->weighted_uptime,
             hist->total_weighted_time);
684
  }
685
  if (!hist->start_of_downtime) {
686
    hist->start_of_downtime = when;
687
688
689
690
691
692
693
694
695
696
697

    if (!was_running)
      log_info(LD_HIST, "Router %s is now non-Running; it was previously "
               "untracked.", hex_str(id, DIGEST_LEN));
  } else {
    if (!was_running) {
      format_local_iso_time(tbuf, hist->start_of_downtime);

      log_info(LD_HIST, "Router %s is still non-Running; it has been "
               "non-Running since %s.", hex_str(id, DIGEST_LEN), tbuf);
    }
698
  }
699
700
}

701
702
703
704
705
706
707
708
709
710
711
712
713
714
/** Mark a router with ID <b>id</b> as non-Running, and retroactively declare
 * that it has never been running: give it no stability and no WFU. */
void
rep_hist_make_router_pessimal(const char *id, time_t when)
{
  or_history_t *hist = get_or_history(id);
  tor_assert(hist);

  rep_hist_note_router_unreachable(id, when);

  hist->weighted_run_length = 0;
  hist->weighted_uptime = 0;
}

715
716
/** Helper: Discount all old MTBF data, if it is time to do so.  Return
 * the time at which we should next discount MTBF data. */
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
time_t
rep_hist_downrate_old_runs(time_t now)
{
  digestmap_iter_t *orhist_it;
  const char *digest1;
  or_history_t *hist;
  void *hist_p;
  double alpha = 1.0;

  if (!history_map)
    history_map = digestmap_new();
  if (!stability_last_downrated)
    stability_last_downrated = now;
  if (stability_last_downrated + STABILITY_INTERVAL > now)
    return stability_last_downrated + STABILITY_INTERVAL;

733
  /* Okay, we should downrate the data.  By how much? */
734
735
736
737
738
  while (stability_last_downrated + STABILITY_INTERVAL < now) {
    stability_last_downrated += STABILITY_INTERVAL;
    alpha *= STABILITY_ALPHA;
  }

739
  log_info(LD_HIST, "Discounting all old stability info by a factor of %f",
740
741
           alpha);

742
  /* Multiply every w_r_l, t_r_w pair by alpha. */
743
744
745
746
747
748
749
750
751
  for (orhist_it = digestmap_iter_init(history_map);
       !digestmap_iter_done(orhist_it);
       orhist_it = digestmap_iter_next(history_map,orhist_it)) {
    digestmap_iter_get(orhist_it, &digest1, &hist_p);
    hist = hist_p;

    hist->weighted_run_length =
      (unsigned long)(hist->weighted_run_length * alpha);
    hist->total_run_weights *= alpha;
752

753
754
755
    hist->weighted_uptime = (unsigned long)(hist->weighted_uptime * alpha);
    hist->total_weighted_time = (unsigned long)
      (hist->total_weighted_time * alpha);
756
757
758
759
760
  }

  return stability_last_downrated + STABILITY_INTERVAL;
}

761
/** Helper: Return the weighted MTBF of the router with history <b>hist</b>. */
762
763
static double
get_stability(or_history_t *hist, time_t when)
764
{
765
  long total = hist->weighted_run_length;
766
  double total_weights = hist->total_run_weights;
767
768

  if (hist->start_of_run) {
769
770
    /* We're currently in a run.  Let total and total_weights hold the values
     * they would hold if the current run were to end now. */
771
772
773
    total += (when-hist->start_of_run);
    total_weights += 1.0;
  }
774
775
  if (total_weights < STABILITY_EPSILON) {
    /* Round down to zero, and avoid divide-by-zero. */
776
    return 0.0;
777
  }
778
779
780
781

  return total / total_weights;
}

782
783
/** Return the total amount of time we've been observing, with each run of
 * time downrated by the appropriate factor. */
784
785
786
787
788
789
790
791
792
793
794
static long
get_total_weighted_time(or_history_t *hist, time_t when)
{
  long total = hist->total_weighted_time;
  if (hist->start_of_run) {
    total += (when - hist->start_of_run);
  } else if (hist->start_of_downtime) {
    total += (when - hist->start_of_downtime);
  }
  return total;
}
795

796
797
/** Helper: Return the weighted percent-of-time-online of the router with
 * history <b>hist</b>. */
798
799
800
static double
get_weighted_fractional_uptime(or_history_t *hist, time_t when)
{
801
802
  long total = hist->total_weighted_time;
  long up = hist->weighted_uptime;
803
804
805
806
807
808
809
810

  if (hist->start_of_run) {
    long run_length = (when - hist->start_of_run);
    up += run_length;
    total += run_length;
  } else if (hist->start_of_downtime) {
    total += (when - hist->start_of_downtime);
  }
811
812
813
814
815
816
817
818

  if (!total) {
    /* Avoid calling anybody's uptime infinity (which should be impossible if
     * the code is working), or NaN (which can happen for any router we haven't
     * observed up or down yet). */
    return 0.0;
  }

819
820
821
  return ((double) up) / total;
}

822
823
824
825
826
827
828
829
830
/** Return how long the router whose identity digest is <b>id</b> has
 *  been reachable. Return 0 if the router is unknown or currently deemed
 *  unreachable. */
long
rep_hist_get_uptime(const char *id, time_t when)
{
  or_history_t *hist = get_or_history(id);
  if (!hist)
    return 0;
831
  if (!hist->start_of_run || when < hist->start_of_run)
832
833
834
835
    return 0;
  return when - hist->start_of_run;
}

836
837
/** Return an estimated MTBF for the router whose identity digest is
 * <b>id</b>. Return 0 if the router is unknown. */
838
839
840
841
842
843
844
845
846
847
double
rep_hist_get_stability(const char *id, time_t when)
{
  or_history_t *hist = get_or_history(id);
  if (!hist)
    return 0.0;

  return get_stability(hist, when);
}

848
849
/** Return an estimated percent-of-time-online for the router whose identity
 * digest is <b>id</b>. Return 0 if the router is unknown. */
850
851
852
853
854
855
856
857
858
859
double
rep_hist_get_weighted_fractional_uptime(const char *id, time_t when)
{
  or_history_t *hist = get_or_history(id);
  if (!hist)
    return 0.0;

  return get_weighted_fractional_uptime(hist, when);
}

860
861
862
/** Return a number representing how long we've known about the router whose
 * digest is <b>id</b>. Return 0 if the router is unknown.
 *
Nick Mathewson's avatar
Nick Mathewson committed
863
 * Be careful: this measure increases monotonically as we know the router for
864
865
866
867
868
869
870
871
872
873
874
875
 * longer and longer, but it doesn't increase linearly.
 */
long
rep_hist_get_weighted_time_known(const char *id, time_t when)
{
  or_history_t *hist = get_or_history(id);
  if (!hist)
    return 0;

  return get_total_weighted_time(hist, when);
}

876
/** Return true if we've been measuring MTBFs for long enough to
Roger Dingledine's avatar
Roger Dingledine committed
877
 * pronounce on Stability. */
878
879
880
int
rep_hist_have_measured_enough_stability(void)
{
881
  /* XXXX++ This doesn't do so well when we change our opinion
882
883
884
885
   * as to whether we're tracking router stability. */
  return started_tracking_stability < time(NULL) - 4*60*60;
}

886
/** Log all the reliability data we have remembered, with the chosen
887
888
 * severity.
 */
889
890
void
rep_hist_dump_stats(time_t now, int severity)
891
{
892
  digestmap_iter_t *orhist_it;
893
  const char *name1, *digest1;
894
  char hexdigest1[HEX_DIGEST_LEN+1];
895
  or_history_t *or_history;
896
  void *or_history_p;
897
  const node_t *node;
898

899
  rep_history_clean(now - get_options()->RephistTrackTime);
900

901
  tor_log(severity, LD_HIST, "--------------- Dumping history information:");
902

903
904
  for (orhist_it = digestmap_iter_init(history_map);
       !digestmap_iter_done(orhist_it);
905
       orhist_it = digestmap_iter_next(history_map,orhist_it)) {
906
907
    double s;
    long stability;
908
    digestmap_iter_get(orhist_it, &digest1, &or_history_p);
909
    or_history = (or_history_t*) or_history_p;
910

911
912
    if ((node = node_get_by_id(digest1)) && node_get_nickname(node))
      name1 = node_get_nickname(node);
913
914
    else
      name1 = "(unknown)";
915
    base16_encode(hexdigest1, sizeof(hexdigest1), digest1, DIGEST_LEN);
916
917
    s = get_stability(or_history, now);
    stability = (long)s;
918
    tor_log(severity, LD_HIST,
919
        "OR %s [%s]: wmtbf %lu:%02lu:%02lu",
920
        name1, hexdigest1,
921
        stability/3600, (stability/60)%60, stability%60);
922
923
924
  }
}

925
/** Remove history info for routers/links that haven't changed since
Roger Dingledine's avatar
Roger Dingledine committed
926
927
 * <b>before</b>.
 */
928
929
void
rep_history_clean(time_t before)
930
{
931
  int authority = authdir_mode(get_options());
932
  or_history_t *or_history;
933
934
935
  void *or_history_p;
  digestmap_iter_t *orhist_it;
  const char *d1;
936

937
938
  orhist_it = digestmap_iter_init(history_map);
  while (!digestmap_iter_done(orhist_it)) {
939
    int should_remove;
940
    digestmap_iter_get(orhist_it, &d1, &or_history_p);
941
    or_history = or_history_p;
942

943
944
    should_remove = authority ?
                       (or_history->total_run_weights < STABILITY_EPSILON &&
945
946
                          !or_history->start_of_run)
                       : (or_history->changed < before);
947
    if (should_remove) {
948
      orhist_it = digestmap_iter_next_rmv(history_map, orhist_it);
949
      free_or_history(or_history);
950
951
      continue;
    }
952
    orhist_it = digestmap_iter_next(history_map, orhist_it);
953
954
955
  }
}

956
957
958
959
960
/** Write MTBF data to disk. Return 0 on success, negative on failure.
 *
 * If <b>missing_means_down</b>, then if we're about to write an entry
 * that is still considered up but isn't in our routerlist, consider it
 * to be down. */
961
int
962
rep_hist_record_mtbf_data(time_t now, int missing_means_down)
963
964
965
966
967
968
969
{
  char time_buf[ISO_TIME_LEN+1];

  digestmap_iter_t *orhist_it;
  const char *digest;
  void *or_history_p;
  or_history_t *hist;
970
971
972
973
  open_file_t *open_file = NULL;
  FILE *f;

  {
974
    char *filename = get_datadir_fname("router-stability");
975
976
977
978
979
980
    f = start_writing_to_stdio_file(filename, OPEN_FLAGS_REPLACE|O_TEXT, 0600,
                                    &open_file);
    tor_free(filename);
    if (!f)
      return -1;
  }
981

982
983
984
985
986
987
988
989
990
  /* File format is:
   *   FormatLine *KeywordLine Data
   *
   *   FormatLine = "format 1" NL
   *   KeywordLine = Keyword SP Arguments NL
   *   Data = "data" NL *RouterMTBFLine "." NL
   *   RouterMTBFLine = Fingerprint SP WeightedRunLen SP
   *           TotalRunWeights [SP S=StartRunTime] NL
   */
991
992
#define PUT(s) STMT_BEGIN if (fputs((s),f)<0) goto err; STMT_END
#define PRINTF(args) STMT_BEGIN if (fprintf args <0) goto err; STMT_END
993

994
  PUT("format 2\n");
995

996
  format_iso_time(time_buf, time(NULL));
997
  PRINTF((f, "stored-at %s\n", time_buf));
998

999
1000
  if (started_tracking_stability) {
    format_iso_time(time_buf, started_tracking_stability);
1001
    PRINTF((f, "tracked-since %s\n", time_buf));
1002
  }
1003
1004
  if (stability_last_downrated) {
    format_iso_time(time_buf, stability_last_downrated);
1005
    PRINTF((f, "last-downrated %s\n", time_buf));
1006
  }
1007

1008
  PUT("data\n");
1009

1010
  /* XXX Nick: now bridge auths record this for all routers too.
1011
1012
   * Should we make them record it only for bridge routers? -RD
   * Not for 0.2.0. -NM */
1013
1014
1015
1016
1017
1018
1019
1020
1021
  for (orhist_it = digestmap_iter_init(history_map);
       !digestmap_iter_done(orhist_it);
       orhist_it = digestmap_iter_next(history_map,orhist_it)) {
    char dbuf[HEX_DIGEST_LEN+1];
    const char *t = NULL;
    digestmap_iter_get(orhist_it, &digest, &or_history_p);
    hist = (or_history_t*) or_history_p;

    base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN);
1022
1023

    if (missing_means_down && hist->start_of_run &&
1024
        !connection_or_digest_is_known_relay(digest)) {
1025
      /* We think this relay is running, but it's not listed in our
1026
       * consensus. Somehow it fell out without telling us it went
1027
1028
1029
1030
1031
1032
1033
       * down. Complain and also correct it. */
      log_info(LD_HIST,
               "Relay '%s' is listed as up in rephist, but it's not in "
               "our routerlist. Correcting.", dbuf);
      rep_hist_note_router_unreachable(digest, now);
    }

1034
    PRINTF((f, "R %s\n", dbuf));
1035
    if (hist->start_of_run > 0) {
1036
1037
1038
      format_iso_time(time_buf, hist->start_of_run);
      t = time_buf;
    }
1039
    PRINTF((f, "+MTBF %lu %.5f%s%s\n",
1040
1041
1042
            hist->weighted_run_length, hist->total_run_weights,
            t ? " S=" : "", t ? t : ""));
    t = NULL;
1043
    if (hist->start_of_downtime > 0) {
1044
1045
1046
1047
1048
      format_iso_time(time_buf, hist->start_of_downtime);
      t = time_buf;
    }
    PRINTF((f, "+WFU %lu %lu%s%s\n",
            hist->weighted_uptime, hist->total_weighted_time,
1049
            t ? " S=" : "", t ? t : ""));
1050
1051
  }

1052
  PUT(".\n");
1053

1054
1055
#undef PUT
#undef PRINTF
1056

1057
1058
1059
1060
  return finish_writing_to_file(open_file);
 err:
  abort_writing_to_file(open_file);
  return -1;
1061
1062
}

1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
/** Helper: return the first j >= i such that !strcmpstart(sl[j], prefix) and
 * such that no line sl[k] with i <= k < j starts with "R ".  Return -1 if no
 * such line exists. */
static int
find_next_with(smartlist_t *sl, int i, const char *prefix)
{
  for ( ; i < smartlist_len(sl); ++i) {
    const char *line = smartlist_get(sl, i);
    if (!strcmpstart(line, prefix))
      return i;
    if (!strcmpstart(line, "R "))
      return -1;
  }
  return -1;
}

1079
/** How many bad times has parse_possibly_bad_iso_time() parsed? */
1080
static int n_bogus_times = 0;
1081
/** Parse the ISO-formatted time in <b>s</b> into *<b>time_out</b>, but
1082
 * round any pre-1970 date to Jan 1, 1970. */
1083
1084
1085
1086
1087
1088
1089
static int
parse_possibly_bad_iso_time(const char *s, time_t *time_out)
{
  int year;
  char b[5];
  strlcpy(b, s, sizeof(b));
  b[4] = '\0';
1090
  year = (int)tor_parse_long(b, 10, 0, INT_MAX, NULL, NULL);
1091
1092
1093
1094
1095
1096
1097
1098
  if (year < 1970) {
    *time_out = 0;
    ++n_bogus_times;
    return 0;
  } else
    return parse_iso_time(s, time_out);
}

1099
1100
1101
1102
1103
/** We've read a time <b>t</b> from a file stored at <b>stored_at</b>, which
 * says we started measuring at <b>started_measuring</b>.  Return a new number
 * that's about as much before <b>now</b> as <b>t</b> was before
 * <b>stored_at</b>.
 */
1104
static inline time_t
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
correct_time(time_t t, time_t now, time_t stored_at, time_t started_measuring)
{
  if (t < started_measuring - 24*60*60*365)
    return 0;
  else if (t < started_measuring)
    return started_measuring;
  else if (t > stored_at)
    return 0;
  else {
    long run_length = stored_at - t;
1115
    t = (time_t)(now - run_length);
1116
1117
1118
1119
1120
1121
    if (t < started_measuring)
      t = started_measuring;
    return t;
  }
}

1122
1123
/** Load MTBF data from disk.  Returns 0 on success or recoverable error, -1
 * on failure. */
1124
int
1125
rep_hist_load_mtbf_data(time_t now)
1126
1127
1128
1129
1130
{
  /* XXXX won't handle being called while history is already populated. */
  smartlist_t *lines;
  const char *line = NULL;
  int r=0, i;
1131
1132
  time_t last_downrated = 0, stored_at = 0, tracked_since = 0;
  time_t latest_possible_start = now;
1133
  long format = -1;
1134
1135

  {
1136
    char *filename = get_datadir_fname("router-stability");
1137
    char *d = read_file_to_str(filename, RFTS_IGNORE_MISSING, NULL);
1138
    tor_free(filename);
1139
1140
    if (!d)
      return -1;
1141
    lines = smartlist_new();
1142
1143
1144
1145
    smartlist_split_string(lines, d, "\n", SPLIT_SKIP_SPACE, 0);
    tor_free(d);
  }

1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
  {
    const char *firstline;
    if (smartlist_len(lines)>4) {
      firstline = smartlist_get(lines, 0);
      if (!strcmpstart(firstline, "format "))
        format = tor_parse_long(firstline+strlen("format "),
                                10, -1, LONG_MAX, NULL, NULL);
    }
  }
  if (format != 1 && format != 2) {
1156
    log_warn(LD_HIST,
1157
             "Unrecognized format in mtbf history file. Skipping.");
1158
1159
1160
1161
1162
1163
1164
1165
    goto err;
  }
  for (i = 1; i < smartlist_len(lines); ++i) {
    line = smartlist_get(lines, i);
    if (!strcmp(line, "data"))
      break;
    if (!strcmpstart(line, "last-downrated ")) {
      if (parse_iso_time(line+strlen("last-downrated "), &last_downrated)<0)
1166
        log_warn(LD_HIST,"Couldn't parse downrate time in mtbf "
1167
1168
                 "history file.");
    }
1169
1170
    if (!strcmpstart(line, "stored-at ")) {
      if (parse_iso_time(line+strlen("stored-at "), &stored_at)<0)
1171
        log_warn(LD_HIST,"Couldn't parse stored time in mtbf "
1172
1173
                 "history file.");
    }
1174
1175
    if (!strcmpstart(line, "tracked-since ")) {
      if (parse_iso_time(line+strlen("tracked-since "), &tracked_since)<0)
1176
        log_warn(LD_HIST,"Couldn't parse started-tracking time in mtbf "
1177
1178
                 "history file.");
    }
1179
  }
1180
1181
  if (last_downrated > now)
    last_downrated = now;
1182
1183
  if (tracked_since > now)
    tracked_since = now;
1184
1185

  if (!stored_at) {
1186
    log_warn(LD_HIST, "No stored time recorded.");
1187
1188
1189
    goto err;
  }

1190
1191
1192
  if (line && !strcmp(line, "data"))
    ++i;

1193
1194
  n_bogus_times = 0;

1195
1196
1197
  for (; i < smartlist_len(lines); ++i) {
    char digest[DIGEST_LEN];
    char hexbuf[HEX_DIGEST_LEN+1];
1198
1199
    char mtbf_timebuf[ISO_TIME_LEN+1];
    char wfu_timebuf[ISO_TIME_LEN+1];
1200
    time_t start_of_run = 0;
1201
1202
1203
1204
1205
    time_t start_of_downtime = 0;
    int have_mtbf = 0, have_wfu = 0;
    long wrl = 0;
    double trw = 0;
    long wt_uptime = 0, total_wt_time = 0;
1206
1207
1208
1209
1210
    int n;
    or_history_t *hist;
    line = smartlist_get(lines, i);
    if (!strcmp(line, "."))
      break;
1211
1212
1213
1214
1215

    mtbf_timebuf[0] = '\0';
    wfu_timebuf[0] = '\0';

    if (format == 1) {
1216
      n = tor_sscanf(line, "%40s %ld %lf S=%10s %8s",
1217
1218
                 hexbuf, &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11);
      if (n != 3 && n != 5) {
1219
        log_warn(LD_HIST, "Couldn't scan line %s", escaped(line));
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
        continue;
      }
      have_mtbf = 1;
    } else {
      // format == 2.
      int mtbf_idx, wfu_idx;
      if (strcmpstart(line, "R ") || strlen(line) < 2+HEX_DIGEST_LEN)
        continue;
      strlcpy(hexbuf, line+2, sizeof(hexbuf));
      mtbf_idx = find_next_with(lines, i+1, "+MTBF ");
      wfu_idx = find_next_with(lines, i+1, "+WFU ");
      if (mtbf_idx >= 0) {
        const char *mtbfline = smartlist_get(lines, mtbf_idx);
1233
        n = tor_sscanf(mtbfline, "+MTBF %lu %lf S=%10s %8s",
1234
1235
1236
1237
                   &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11);
        if (n == 2 || n == 4) {
          have_mtbf = 1;
        } else {
1238
          log_warn(LD_HIST, "Couldn't scan +MTBF line %s",
1239
1240
1241
1242
1243
                   escaped(mtbfline));
        }
      }
      if (wfu_idx >= 0) {
        const char *wfuline = smartlist_get(lines, wfu_idx);
1244
        n = tor_sscanf(wfuline, "+WFU %lu %lu S=%10s %8s",
1245
1246
1247
1248
1249
                   &wt_uptime, &total_wt_time,
                   wfu_timebuf, wfu_timebuf+11);
        if (n == 2 || n == 4) {
          have_wfu = 1;
        } else {
1250
          log_warn(LD_HIST, "Couldn't scan +WFU line %s", escaped(wfuline));
1251
1252
1253
1254
1255
1256
        }
      }
      if (wfu_idx > i)
        i = wfu_idx;
      if (mtbf_idx > i)
        i = mtbf_idx;
1257
    }
1258
1259
    if (base16_decode(digest, DIGEST_LEN,
                      hexbuf, HEX_DIGEST_LEN) != DIGEST_LEN) {
1260
      log_warn(LD_HIST, "Couldn't hex string %s", escaped(hexbuf));
1261
1262
1263
1264
1265
      continue;
    }
    hist = get_or_history(digest);
    if (!hist)
      continue;
1266

1267
1268
1269
    if (have_mtbf) {
      if (mtbf_timebuf[0]) {
        mtbf_timebuf[10] = ' ';
1270
        if (parse_possibly_bad_iso_time(mtbf_timebuf, &start_of_run)<0)
1271
          log_warn(LD_HIST, "Couldn't parse time %s",
1272
1273
                   escaped(mtbf_timebuf));
      }
1274
1275
      hist->start_of_run = correct_time(start_of_run, now