rephist.c 80.8 KB
Newer Older
1
/* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
Karsten Loesing's avatar
Karsten Loesing committed
2
 * Copyright (c) 2007-2009, The Tor Project, Inc. */
3
4
/* See LICENSE for licensing information */

Nick Mathewson's avatar
Nick Mathewson committed
5
6
/**
 * \file rephist.c
7
8
9
 * \brief Basic history and "reputation" functionality to remember
 *    which servers have worked in the past, how much bandwidth we've
 *    been using, which ports we tend to want, and so on.
Nick Mathewson's avatar
Nick Mathewson committed
10
 **/
11

12
#include "or.h"
13
#include "ht.h"
14

15
static void bw_arrays_init(void);
16
static void predicted_ports_init(void);
17
static void hs_usage_init(void);
18

Roger Dingledine's avatar
Roger Dingledine committed
19
/** Total number of bytes currently allocated in fields used by rephist.c. */
20
uint64_t rephist_total_alloc=0;
Roger Dingledine's avatar
Roger Dingledine committed
21
/** Number of or_history_t objects currently allocated. */
22
uint32_t rephist_total_num=0;
23

24
25
/** If the total weighted run count of all runs for a router ever falls
 * below this amount, the router can be treated as having 0 MTBF. */
26
#define STABILITY_EPSILON   0.0001
Nick Mathewson's avatar
Nick Mathewson committed
27
/** Value by which to discount all old intervals for MTBF purposes.  This
28
29
30
 * is compounded every STABILITY_INTERVAL. */
#define STABILITY_ALPHA     0.95
/** Interval at which to discount all old intervals for MTBF purposes. */
31
#define STABILITY_INTERVAL  (12*60*60)
32
33
34
35
/* (This combination of ALPHA, INTERVAL, and EPSILON makes it so that an
 * interval that just ended counts twice as much as one that ended a week ago,
 * 20X as much as one that ended a month ago, and routers that have had no
 * uptime data for about half a year will get forgotten.) */
36

Nick Mathewson's avatar
Nick Mathewson committed
37
/** History of an OR-\>OR link. */
38
typedef struct link_history_t {
Nick Mathewson's avatar
Nick Mathewson committed
39
  /** When did we start tracking this list? */
40
  time_t since;
41
42
  /** When did we most recently note a change to this link */
  time_t changed;
43
  /** How many times did extending from OR1 to OR2 succeed? */
44
  unsigned long n_extend_ok;
Nick Mathewson's avatar
Nick Mathewson committed
45
  /** How many times did extending from OR1 to OR2 fail? */
46
47
48
  unsigned long n_extend_fail;
} link_history_t;

Nick Mathewson's avatar
Nick Mathewson committed
49
/** History of an OR. */
50
typedef struct or_history_t {
Nick Mathewson's avatar
Nick Mathewson committed
51
  /** When did we start tracking this OR? */
52
  time_t since;
53
54
  /** When did we most recently note a change to this OR? */
  time_t changed;
Nick Mathewson's avatar
Nick Mathewson committed
55
  /** How many times did we successfully connect? */
56
  unsigned long n_conn_ok;
Nick Mathewson's avatar
Nick Mathewson committed
57
  /** How many times did we try to connect and fail?*/
58
  unsigned long n_conn_fail;
Nick Mathewson's avatar
Nick Mathewson committed
59
  /** How many seconds have we been connected to this OR before
60
   * 'up_since'? */
61
  unsigned long uptime;
Nick Mathewson's avatar
Nick Mathewson committed
62
  /** How many seconds have we been unable to connect to this OR before
63
   * 'down_since'? */
64
  unsigned long downtime;
Nick Mathewson's avatar
Nick Mathewson committed
65
  /** If nonzero, we have been connected since this time. */
66
  time_t up_since;
Nick Mathewson's avatar
Nick Mathewson committed
67
  /** If nonzero, we have been unable to connect since this time. */
68
  time_t down_since;
69
70
71
72

  /* === For MTBF tracking: */
  /** Weighted sum total of all times that this router has been online.
   */
73
  unsigned long weighted_run_length;
74
75
  /** If the router is now online (according to stability-checking rules),
   * when did it come online? */
76
  time_t start_of_run;
77
  /** Sum of weights for runs in weighted_run_length. */
78
  double total_run_weights;
79
80
81
82
  /* === For fractional uptime tracking: */
  time_t start_of_downtime;
  unsigned long weighted_uptime;
  unsigned long total_weighted_time;
83

Nick Mathewson's avatar
Nick Mathewson committed
84
  /** Map from hex OR2 identity digest to a link_history_t for the link
85
   * from this OR to OR2. */
86
  digestmap_t *link_history_map;
87
88
} or_history_t;

89
90
/** When did we last multiply all routers' weighted_run_length and
 * total_run_weights by STABILITY_ALPHA? */
91
92
static time_t stability_last_downrated = 0;

93
94
95
/**  */
static time_t started_tracking_stability = 0;

Nick Mathewson's avatar
Nick Mathewson committed
96
/** Map from hex OR identity digest to or_history_t. */
97
static digestmap_t *history_map = NULL;
98

99
100
/** Return the or_history_t for the OR with identity digest <b>id</b>,
 * creating it if necessary. */
101
102
static or_history_t *
get_or_history(const char* id)
103
104
{
  or_history_t *hist;
105

106
  if (tor_mem_is_zero(id, DIGEST_LEN))
107
108
    return NULL;

109
  hist = digestmap_get(history_map, id);
110
111
  if (!hist) {
    hist = tor_malloc_zero(sizeof(or_history_t));
112
    rephist_total_alloc += sizeof(or_history_t);
113
    rephist_total_num++;
114
    hist->link_history_map = digestmap_new();
115
    hist->since = hist->changed = time(NULL);
116
    digestmap_set(history_map, id, hist);
117
118
119
120
  }
  return hist;
}

Nick Mathewson's avatar
Nick Mathewson committed
121
/** Return the link_history_t for the link from the first named OR to
Nick Mathewson's avatar
Nick Mathewson committed
122
 * the second, creating it if necessary. (ORs are identified by
Roger Dingledine's avatar
Roger Dingledine committed
123
 * identity digest.)
124
 */
125
126
static link_history_t *
get_link_history(const char *from_id, const char *to_id)
127
128
129
{
  or_history_t *orhist;
  link_history_t *lhist;
130
  orhist = get_or_history(from_id);
131
132
  if (!orhist)
    return NULL;
133
  if (tor_mem_is_zero(to_id, DIGEST_LEN))
134
    return NULL;
135
  lhist = (link_history_t*) digestmap_get(orhist->link_history_map, to_id);
136
137
  if (!lhist) {
    lhist = tor_malloc_zero(sizeof(link_history_t));
138
    rephist_total_alloc += sizeof(link_history_t);
139
    lhist->since = lhist->changed = time(NULL);
140
    digestmap_set(orhist->link_history_map, to_id, lhist);
141
142
143
144
  }
  return lhist;
}

Roger Dingledine's avatar
Roger Dingledine committed
145
/** Helper: free storage held by a single link history entry. */
146
147
148
static void
_free_link_history(void *val)
{
149
  rephist_total_alloc -= sizeof(link_history_t);
150
151
152
  tor_free(val);
}

Roger Dingledine's avatar
Roger Dingledine committed
153
/** Helper: free storage held by a single OR history entry. */
154
static void
155
free_or_history(void *_hist)
156
{
157
  or_history_t *hist = _hist;
158
  digestmap_free(hist->link_history_map, _free_link_history);
159
  rephist_total_alloc -= sizeof(or_history_t);
160
  rephist_total_num--;
161
162
163
  tor_free(hist);
}

Nick Mathewson's avatar
Nick Mathewson committed
164
165
/** Update an or_history_t object <b>hist</b> so that its uptime/downtime
 * count is up-to-date as of <b>when</b>.
166
 */
167
168
static void
update_or_history(or_history_t *hist, time_t when)
169
{
Roger Dingledine's avatar
Roger Dingledine committed
170
  tor_assert(hist);
171
  if (hist->up_since) {
Roger Dingledine's avatar
Roger Dingledine committed
172
    tor_assert(!hist->down_since);
173
174
175
176
177
178
179
180
    hist->uptime += (when - hist->up_since);
    hist->up_since = when;
  } else if (hist->down_since) {
    hist->downtime += (when - hist->down_since);
    hist->down_since = when;
  }
}

Roger Dingledine's avatar
Roger Dingledine committed
181
/** Initialize the static data structures for tracking history. */
182
183
void
rep_hist_init(void)
184
{
185
  history_map = digestmap_new();
186
  bw_arrays_init();
187
  predicted_ports_init();
188
  hs_usage_init();
189
190
}

191
192
193
/** Helper: note that we are no longer connected to the router with history
 * <b>hist</b>.  If <b>failed</b>, the connection failed; otherwise, it was
 * closed correctly. */
194
195
196
197
198
199
200
201
202
203
204
205
static void
mark_or_down(or_history_t *hist, time_t when, int failed)
{
  if (hist->up_since) {
    hist->uptime += (when - hist->up_since);
    hist->up_since = 0;
  }
  if (failed && !hist->down_since) {
    hist->down_since = when;
  }
}

206
207
/** Helper: note that we are connected to the router with history
 * <b>hist</b>. */
208
209
210
211
212
213
214
215
216
217
218
219
static void
mark_or_up(or_history_t *hist, time_t when)
{
  if (hist->down_since) {
    hist->downtime += (when - hist->down_since);
    hist->down_since = 0;
  }
  if (!hist->up_since) {
    hist->up_since = when;
  }
}

Nick Mathewson's avatar
Nick Mathewson committed
220
221
/** Remember that an attempt to connect to the OR with identity digest
 * <b>id</b> failed at <b>when</b>.
222
 */
223
224
void
rep_hist_note_connect_failed(const char* id, time_t when)
225
226
{
  or_history_t *hist;
227
  hist = get_or_history(id);
228
229
  if (!hist)
    return;
230
  ++hist->n_conn_fail;
231
  mark_or_down(hist, when, 1);
232
  hist->changed = when;
233
234
}

Nick Mathewson's avatar
Nick Mathewson committed
235
236
/** Remember that an attempt to connect to the OR with identity digest
 * <b>id</b> succeeded at <b>when</b>.
237
 */
238
239
void
rep_hist_note_connect_succeeded(const char* id, time_t when)
240
241
{
  or_history_t *hist;
242
  hist = get_or_history(id);
243
244
  if (!hist)
    return;
245
  ++hist->n_conn_ok;
246
  mark_or_up(hist, when);
247
  hist->changed = when;
248
}
249

Nick Mathewson's avatar
Nick Mathewson committed
250
/** Remember that we intentionally closed our connection to the OR
Nick Mathewson's avatar
Nick Mathewson committed
251
 * with identity digest <b>id</b> at <b>when</b>.
252
 */
253
254
void
rep_hist_note_disconnect(const char* id, time_t when)
255
256
{
  or_history_t *hist;
257
  hist = get_or_history(id);
258
259
  if (!hist)
    return;
260
  mark_or_down(hist, when, 0);
261
  hist->changed = when;
262
263
}

Nick Mathewson's avatar
Nick Mathewson committed
264
265
/** Remember that our connection to the OR with identity digest
 * <b>id</b> had an error and stopped working at <b>when</b>.
266
 */
267
268
void
rep_hist_note_connection_died(const char* id, time_t when)
269
270
{
  or_history_t *hist;
271
  if (!id) {
272
    /* If conn has no identity, it didn't complete its handshake, or something
273
     * went wrong.  Ignore it.
274
275
276
     */
    return;
  }
277
  hist = get_or_history(id);
278
279
  if (!hist)
    return;
280
  mark_or_down(hist, when, 1);
281
  hist->changed = when;
282
283
}

284
285
/** We have just decided that this router with identity digest <b>id</b> is
 * reachable, meaning we will give it a "Running" flag for the next while. */
286
287
288
void
rep_hist_note_router_reachable(const char *id, time_t when)
{
289
  or_history_t *hist = get_or_history(id);
290
291
292
  int was_in_run = 1;
  char tbuf[ISO_TIME_LEN+1];

293
294
  tor_assert(hist);

295
296
  if (!started_tracking_stability)
    started_tracking_stability = time(NULL);
297
  if (!hist->start_of_run) {
298
    hist->start_of_run = when;
299
    was_in_run = 0;
300
  }
301
  if (hist->start_of_downtime) {
302
303
304
305
306
307
308
309
310
    long down_length;

    format_local_iso_time(tbuf, hist->start_of_downtime);
    log_info(LD_HIST, "Router %s is now Running; it had been down since %s.",
             hex_str(id, DIGEST_LEN), tbuf);
    if (was_in_run)
      log_info(LD_HIST, "  (Paradoxically, it was already Running too.)");

    down_length = when - hist->start_of_downtime;
311
312
    hist->total_weighted_time += down_length;
    hist->start_of_downtime = 0;
313
314
315
316
317
318
  } else {
    format_local_iso_time(tbuf, hist->start_of_run);
    if (was_in_run)
      log_debug(LD_HIST, "Router %s is still Running; it has been Running "
                "since %s", hex_str(id, DIGEST_LEN), tbuf);
    else
Nick Mathewson's avatar
Nick Mathewson committed
319
      log_info(LD_HIST,"Router %s is now Running; it was previously untracked",
320
               hex_str(id, DIGEST_LEN));
321
  }
322
323
324
325
326
327
328
}

/** We have just decided that this router is unreachable, meaning
 * we are taking away its "Running" flag. */
void
rep_hist_note_router_unreachable(const char *id, time_t when)
{
329
  or_history_t *hist = get_or_history(id);
330
331
  char tbuf[ISO_TIME_LEN+1];
  int was_running = 0;
332
333
  if (!started_tracking_stability)
    started_tracking_stability = time(NULL);
334
335
336

  tor_assert(hist);
  if (hist->start_of_run) {
337
    /*XXXX We could treat failed connections differently from failed
338
     * connect attempts. */
339
    long run_length = when - hist->start_of_run;
340
341
    format_local_iso_time(tbuf, hist->start_of_run);

342
343
344
    hist->weighted_run_length += run_length;
    hist->total_run_weights += 1.0;
    hist->start_of_run = 0;
345
346
    hist->weighted_uptime += run_length;
    hist->total_weighted_time += run_length;
347
348
349
350
351
352

    was_running = 1;
    log_info(LD_HIST, "Router %s is now non-Running: it had previously been "
             "Running since %s.  Its total weighted uptime is %lu/%lu.",
             hex_str(id, DIGEST_LEN), tbuf, hist->weighted_uptime,
             hist->total_weighted_time);
353
  }
354
  if (!hist->start_of_downtime) {
355
    hist->start_of_downtime = when;
356
357
358
359
360
361
362
363
364
365
366

    if (!was_running)
      log_info(LD_HIST, "Router %s is now non-Running; it was previously "
               "untracked.", hex_str(id, DIGEST_LEN));
  } else {
    if (!was_running) {
      format_local_iso_time(tbuf, hist->start_of_downtime);

      log_info(LD_HIST, "Router %s is still non-Running; it has been "
               "non-Running since %s.", hex_str(id, DIGEST_LEN), tbuf);
    }
367
  }
368
369
}

370
371
/** Helper: Discount all old MTBF data, if it is time to do so.  Return
 * the time at which we should next discount MTBF data. */
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
time_t
rep_hist_downrate_old_runs(time_t now)
{
  digestmap_iter_t *orhist_it;
  const char *digest1;
  or_history_t *hist;
  void *hist_p;
  double alpha = 1.0;

  if (!history_map)
    history_map = digestmap_new();
  if (!stability_last_downrated)
    stability_last_downrated = now;
  if (stability_last_downrated + STABILITY_INTERVAL > now)
    return stability_last_downrated + STABILITY_INTERVAL;

388
  /* Okay, we should downrate the data.  By how much? */
389
390
391
392
393
  while (stability_last_downrated + STABILITY_INTERVAL < now) {
    stability_last_downrated += STABILITY_INTERVAL;
    alpha *= STABILITY_ALPHA;
  }

394
395
396
  log_info(LD_HIST, "Discounting all old stability info by a factor of %lf",
           alpha);

397
  /* Multiply every w_r_l, t_r_w pair by alpha. */
398
399
400
401
402
403
404
405
406
  for (orhist_it = digestmap_iter_init(history_map);
       !digestmap_iter_done(orhist_it);
       orhist_it = digestmap_iter_next(history_map,orhist_it)) {
    digestmap_iter_get(orhist_it, &digest1, &hist_p);
    hist = hist_p;

    hist->weighted_run_length =
      (unsigned long)(hist->weighted_run_length * alpha);
    hist->total_run_weights *= alpha;
407

408
409
410
    hist->weighted_uptime = (unsigned long)(hist->weighted_uptime * alpha);
    hist->total_weighted_time = (unsigned long)
      (hist->total_weighted_time * alpha);
411
412
413
414
415
  }

  return stability_last_downrated + STABILITY_INTERVAL;
}

416
/** Helper: Return the weighted MTBF of the router with history <b>hist</b>. */
417
418
static double
get_stability(or_history_t *hist, time_t when)
419
{
420
421
  unsigned long total = hist->weighted_run_length;
  double total_weights = hist->total_run_weights;
422
423

  if (hist->start_of_run) {
424
425
    /* We're currently in a run.  Let total and total_weights hold the values
     * they would hold if the current run were to end now. */
426
427
428
    total += (when-hist->start_of_run);
    total_weights += 1.0;
  }
429
430
  if (total_weights < STABILITY_EPSILON) {
    /* Round down to zero, and avoid divide-by-zero. */
431
    return 0.0;
432
  }
433
434
435
436

  return total / total_weights;
}

437
438
/** Return the total amount of time we've been observing, with each run of
 * time downrated by the appropriate factor. */
439
440
441
442
443
444
445
446
447
448
449
static long
get_total_weighted_time(or_history_t *hist, time_t when)
{
  long total = hist->total_weighted_time;
  if (hist->start_of_run) {
    total += (when - hist->start_of_run);
  } else if (hist->start_of_downtime) {
    total += (when - hist->start_of_downtime);
  }
  return total;
}
450

451
452
/** Helper: Return the weighted percent-of-time-online of the router with
 * history <b>hist</b>. */
453
454
455
456
457
458
459
460
461
462
463
464
465
static double
get_weighted_fractional_uptime(or_history_t *hist, time_t when)
{
  unsigned long total = hist->total_weighted_time;
  unsigned long up = hist->weighted_uptime;

  if (hist->start_of_run) {
    long run_length = (when - hist->start_of_run);
    up += run_length;
    total += run_length;
  } else if (hist->start_of_downtime) {
    total += (when - hist->start_of_downtime);
  }
466
467
468
469
470
471
472
473

  if (!total) {
    /* Avoid calling anybody's uptime infinity (which should be impossible if
     * the code is working), or NaN (which can happen for any router we haven't
     * observed up or down yet). */
    return 0.0;
  }

474
475
476
  return ((double) up) / total;
}

477
478
/** Return an estimated MTBF for the router whose identity digest is
 * <b>id</b>. Return 0 if the router is unknown. */
479
480
481
482
483
484
485
486
487
488
double
rep_hist_get_stability(const char *id, time_t when)
{
  or_history_t *hist = get_or_history(id);
  if (!hist)
    return 0.0;

  return get_stability(hist, when);
}

489
490
/** Return an estimated percent-of-time-online for the router whose identity
 * digest is <b>id</b>. Return 0 if the router is unknown. */
491
492
493
494
495
496
497
498
499
500
double
rep_hist_get_weighted_fractional_uptime(const char *id, time_t when)
{
  or_history_t *hist = get_or_history(id);
  if (!hist)
    return 0.0;

  return get_weighted_fractional_uptime(hist, when);
}

501
502
503
/** Return a number representing how long we've known about the router whose
 * digest is <b>id</b>. Return 0 if the router is unknown.
 *
Nick Mathewson's avatar
Nick Mathewson committed
504
 * Be careful: this measure increases monotonically as we know the router for
505
506
507
508
509
510
511
512
513
514
515
516
 * longer and longer, but it doesn't increase linearly.
 */
long
rep_hist_get_weighted_time_known(const char *id, time_t when)
{
  or_history_t *hist = get_or_history(id);
  if (!hist)
    return 0;

  return get_total_weighted_time(hist, when);
}

517
/** Return true if we've been measuring MTBFs for long enough to
Roger Dingledine's avatar
Roger Dingledine committed
518
 * pronounce on Stability. */
519
520
521
int
rep_hist_have_measured_enough_stability(void)
{
522
  /* XXXX021 This doesn't do so well when we change our opinion
523
524
525
526
   * as to whether we're tracking router stability. */
  return started_tracking_stability < time(NULL) - 4*60*60;
}

Nick Mathewson's avatar
Nick Mathewson committed
527
528
/** Remember that we successfully extended from the OR with identity
 * digest <b>from_id</b> to the OR with identity digest
Roger Dingledine's avatar
Roger Dingledine committed
529
 * <b>to_name</b>.
530
 */
531
532
void
rep_hist_note_extend_succeeded(const char *from_id, const char *to_id)
533
534
{
  link_history_t *hist;
535
  /* log_fn(LOG_WARN, "EXTEND SUCCEEDED: %s->%s",from_name,to_name); */
536
  hist = get_link_history(from_id, to_id);
537
538
  if (!hist)
    return;
539
  ++hist->n_extend_ok;
540
  hist->changed = time(NULL);
541
}
542

Nick Mathewson's avatar
Nick Mathewson committed
543
544
545
/** Remember that we tried to extend from the OR with identity digest
 * <b>from_id</b> to the OR with identity digest <b>to_name</b>, but
 * failed.
546
 */
547
548
void
rep_hist_note_extend_failed(const char *from_id, const char *to_id)
549
550
{
  link_history_t *hist;
551
  /* log_fn(LOG_WARN, "EXTEND FAILED: %s->%s",from_name,to_name); */
552
  hist = get_link_history(from_id, to_id);
553
554
  if (!hist)
    return;
555
  ++hist->n_extend_fail;
556
  hist->changed = time(NULL);
557
558
}

559
/** Log all the reliability data we have remembered, with the chosen
560
561
 * severity.
 */
562
563
void
rep_hist_dump_stats(time_t now, int severity)
564
{
565
566
567
568
  digestmap_iter_t *lhist_it;
  digestmap_iter_t *orhist_it;
  const char *name1, *name2, *digest1, *digest2;
  char hexdigest1[HEX_DIGEST_LEN+1];
569
570
  or_history_t *or_history;
  link_history_t *link_history;
571
  void *or_history_p, *link_history_p;
572
573
  double uptime;
  char buffer[2048];
574
  size_t len;
Nick Mathewson's avatar
Nick Mathewson committed
575
  int ret;
576
  unsigned long upt, downt;
577
  routerinfo_t *r;
578

579
  rep_history_clean(now - get_options()->RephistTrackTime);
580

581
  log(severity, LD_HIST, "--------------- Dumping history information:");
582

583
584
  for (orhist_it = digestmap_iter_init(history_map);
       !digestmap_iter_done(orhist_it);
585
       orhist_it = digestmap_iter_next(history_map,orhist_it)) {
586
587
    double s;
    long stability;
588
    digestmap_iter_get(orhist_it, &digest1, &or_history_p);
589
    or_history = (or_history_t*) or_history_p;
590

591
    if ((r = router_get_by_digest(digest1)))
592
593
594
      name1 = r->nickname;
    else
      name1 = "(unknown)";
595
    base16_encode(hexdigest1, sizeof(hexdigest1), digest1, DIGEST_LEN);
596
    update_or_history(or_history, now);
597
598
    upt = or_history->uptime;
    downt = or_history->downtime;
599
600
    s = get_stability(or_history, now);
    stability = (long)s;
601
602
603
604
605
    if (upt+downt) {
      uptime = ((double)upt) / (upt+downt);
    } else {
      uptime=1.0;
    }
606
    log(severity, LD_HIST,
607
        "OR %s [%s]: %ld/%ld good connections; uptime %ld/%ld sec (%.2f%%); "
608
        "wmtbf %lu:%02lu:%02lu",
609
        name1, hexdigest1,
Roger Dingledine's avatar
tabs    
Roger Dingledine committed
610
        or_history->n_conn_ok, or_history->n_conn_fail+or_history->n_conn_ok,
611
612
        upt, upt+downt, uptime*100.0,
        stability/3600, (stability/60)%60, stability%60);
613

614
    if (!digestmap_isempty(or_history->link_history_map)) {
615
      strlcpy(buffer, "    Extend attempts: ", sizeof(buffer));
616
      len = strlen(buffer);
617
618
      for (lhist_it = digestmap_iter_init(or_history->link_history_map);
           !digestmap_iter_done(lhist_it);
619
620
           lhist_it = digestmap_iter_next(or_history->link_history_map,
                                          lhist_it)) {
621
622
        digestmap_iter_get(lhist_it, &digest2, &link_history_p);
        if ((r = router_get_by_digest(digest2)))
623
624
625
          name2 = r->nickname;
        else
          name2 = "(unknown)";
626

627
        link_history = (link_history_t*) link_history_p;
628

Nick Mathewson's avatar
Nick Mathewson committed
629
        ret = tor_snprintf(buffer+len, 2048-len, "%s(%ld/%ld); ", name2,
630
631
                        link_history->n_extend_ok,
                        link_history->n_extend_ok+link_history->n_extend_fail);
Nick Mathewson's avatar
Nick Mathewson committed
632
        if (ret<0)
633
          break;
Nick Mathewson's avatar
Nick Mathewson committed
634
        else
Nick Mathewson's avatar
Nick Mathewson committed
635
          len += ret;
636
      }
637
      log(severity, LD_HIST, "%s", buffer);
638
639
640
641
    }
  }
}

642
/** Remove history info for routers/links that haven't changed since
Roger Dingledine's avatar
Roger Dingledine committed
643
644
 * <b>before</b>.
 */
645
646
void
rep_history_clean(time_t before)
647
{
648
  int authority = authdir_mode(get_options());
649
650
651
  or_history_t *or_history;
  link_history_t *link_history;
  void *or_history_p, *link_history_p;
652
653
  digestmap_iter_t *orhist_it, *lhist_it;
  const char *d1, *d2;
654

655
656
  orhist_it = digestmap_iter_init(history_map);
  while (!digestmap_iter_done(orhist_it)) {
657
    int remove;
658
    digestmap_iter_get(orhist_it, &d1, &or_history_p);
659
    or_history = or_history_p;
660
661
662
663
664

    remove = authority ? (or_history->total_run_weights < STABILITY_EPSILON &&
                          !or_history->start_of_run)
                       : (or_history->changed < before);
    if (remove) {
665
      orhist_it = digestmap_iter_next_rmv(history_map, orhist_it);
666
      free_or_history(or_history);
667
668
      continue;
    }
669
670
671
    for (lhist_it = digestmap_iter_init(or_history->link_history_map);
         !digestmap_iter_done(lhist_it); ) {
      digestmap_iter_get(lhist_it, &d2, &link_history_p);
672
673
      link_history = link_history_p;
      if (link_history->changed < before) {
674
675
        lhist_it = digestmap_iter_next_rmv(or_history->link_history_map,
                                           lhist_it);
676
        rephist_total_alloc -= sizeof(link_history_t);
677
678
679
        tor_free(link_history);
        continue;
      }
680
      lhist_it = digestmap_iter_next(or_history->link_history_map,lhist_it);
681
    }
682
    orhist_it = digestmap_iter_next(history_map, orhist_it);
683
684
685
  }
}

686
687
688
689
690
/** Write MTBF data to disk. Return 0 on success, negative on failure.
 *
 * If <b>missing_means_down</b>, then if we're about to write an entry
 * that is still considered up but isn't in our routerlist, consider it
 * to be down. */
691
int
692
rep_hist_record_mtbf_data(time_t now, int missing_means_down)
693
694
695
696
697
698
699
{
  char time_buf[ISO_TIME_LEN+1];

  digestmap_iter_t *orhist_it;
  const char *digest;
  void *or_history_p;
  or_history_t *hist;
700
701
702
703
  open_file_t *open_file = NULL;
  FILE *f;

  {
704
    char *filename = get_datadir_fname("router-stability");
705
706
707
708
709
710
    f = start_writing_to_stdio_file(filename, OPEN_FLAGS_REPLACE|O_TEXT, 0600,
                                    &open_file);
    tor_free(filename);
    if (!f)
      return -1;
  }
711

712
713
714
715
716
717
718
719
720
  /* File format is:
   *   FormatLine *KeywordLine Data
   *
   *   FormatLine = "format 1" NL
   *   KeywordLine = Keyword SP Arguments NL
   *   Data = "data" NL *RouterMTBFLine "." NL
   *   RouterMTBFLine = Fingerprint SP WeightedRunLen SP
   *           TotalRunWeights [SP S=StartRunTime] NL
   */
721
722
#define PUT(s) STMT_BEGIN if (fputs((s),f)<0) goto err; STMT_END
#define PRINTF(args) STMT_BEGIN if (fprintf args <0) goto err; STMT_END
723

724
  PUT("format 2\n");
725

726
  format_iso_time(time_buf, time(NULL));
727
  PRINTF((f, "stored-at %s\n", time_buf));
728

729
730
  if (started_tracking_stability) {
    format_iso_time(time_buf, started_tracking_stability);
731
    PRINTF((f, "tracked-since %s\n", time_buf));
732
  }
733
734
  if (stability_last_downrated) {
    format_iso_time(time_buf, stability_last_downrated);
735
    PRINTF((f, "last-downrated %s\n", time_buf));
736
  }
737

738
  PUT("data\n");
739

740
  /* XXX Nick: now bridge auths record this for all routers too.
741
742
   * Should we make them record it only for bridge routers? -RD
   * Not for 0.2.0. -NM */
743
744
745
746
747
748
749
750
751
  for (orhist_it = digestmap_iter_init(history_map);
       !digestmap_iter_done(orhist_it);
       orhist_it = digestmap_iter_next(history_map,orhist_it)) {
    char dbuf[HEX_DIGEST_LEN+1];
    const char *t = NULL;
    digestmap_iter_get(orhist_it, &digest, &or_history_p);
    hist = (or_history_t*) or_history_p;

    base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN);
752
753
754
755
756
757
758
759
760
761
762
763

    if (missing_means_down && hist->start_of_run &&
        !router_get_by_digest(digest)) {
      /* We think this relay is running, but it's not listed in our
       * routerlist. Somehow it fell out without telling us it went
       * down. Complain and also correct it. */
      log_info(LD_HIST,
               "Relay '%s' is listed as up in rephist, but it's not in "
               "our routerlist. Correcting.", dbuf);
      rep_hist_note_router_unreachable(digest, now);
    }

764
    PRINTF((f, "R %s\n", dbuf));
765
    if (hist->start_of_run > 0) {
766
767
768
      format_iso_time(time_buf, hist->start_of_run);
      t = time_buf;
    }
769
770
771
772
    PRINTF((f, "+MTBF %lu %.5lf%s%s\n",
            hist->weighted_run_length, hist->total_run_weights,
            t ? " S=" : "", t ? t : ""));
    t = NULL;
773
    if (hist->start_of_downtime > 0) {
774
775
776
777
778
      format_iso_time(time_buf, hist->start_of_downtime);
      t = time_buf;
    }
    PRINTF((f, "+WFU %lu %lu%s%s\n",
            hist->weighted_uptime, hist->total_weighted_time,
779
            t ? " S=" : "", t ? t : ""));
780
781
  }

782
  PUT(".\n");
783

784
785
#undef PUT
#undef PRINTF
786

787
788
789
790
  return finish_writing_to_file(open_file);
 err:
  abort_writing_to_file(open_file);
  return -1;
791
792
}

Nick Mathewson's avatar
Nick Mathewson committed
793
794
/** Format the current tracked status of the router in <b>hist</b> at time
 * <b>now</b> for analysis; return it in a newly allocated string. */
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
static char *
rep_hist_format_router_status(or_history_t *hist, time_t now)
{
  char buf[1024];
  char sor_buf[ISO_TIME_LEN+1];
  char sod_buf[ISO_TIME_LEN+1];
  double wfu;
  double mtbf;
  int up = 0, down = 0;

  if (hist->start_of_run) {
    format_iso_time(sor_buf, hist->start_of_run);
    up = 1;
  }
  if (hist->start_of_downtime) {
810
    format_iso_time(sod_buf, hist->start_of_downtime);
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
    down = 1;
  }

  wfu = get_weighted_fractional_uptime(hist, now);
  mtbf = get_stability(hist, now);
  tor_snprintf(buf, sizeof(buf),
               "%s%s%s"
               "%s%s%s"
               "wfu %0.3lf\n"
               " weighted-time %lu\n"
               " weighted-uptime %lu\n"
               "mtbf %0.1lf\n"
               " weighted-run-length %lu\n"
               " total-run-weights %lf\n",
               up?"uptime-started ":"", up?sor_buf:"", up?" UTC\n":"",
               down?"downtime-started ":"", down?sod_buf:"", down?" UTC\n":"",
               wfu,
               hist->total_weighted_time,
               hist->weighted_uptime,
               mtbf,
               hist->weighted_run_length,
               hist->total_run_weights
               );

  return tor_strdup(buf);
}

Nick Mathewson's avatar
Nick Mathewson committed
838
839
/** The last stability analysis document that we created, or NULL if we never
 * have created one. */
840
static char *last_stability_doc = NULL;
Nick Mathewson's avatar
Nick Mathewson committed
841
842
/** The last time we created a stability analysis document, or 0 if we never
 * have created one. */
843
static time_t built_last_stability_doc_at = 0;
Nick Mathewson's avatar
Nick Mathewson committed
844
/** Shortest allowable time between building two stability documents. */
845
846
#define MAX_STABILITY_DOC_BUILD_RATE (3*60)

Nick Mathewson's avatar
Nick Mathewson committed
847
848
/** Return a pointer to a NUL-terminated document describing our view of the
 * stability of the routers we've been tracking.  Return NULL on failure. */
849
850
851
852
853
854
855
856
857
858
859
860
861
862
const char *
rep_hist_get_router_stability_doc(time_t now)
{
  char *result;
  smartlist_t *chunks;
  if (built_last_stability_doc_at + MAX_STABILITY_DOC_BUILD_RATE > now)
    return last_stability_doc;

  if (!history_map)
    return NULL;

  tor_free(last_stability_doc);
  chunks = smartlist_create();

863
864
865
866
867
868
  if (rep_hist_have_measured_enough_stability()) {
    smartlist_add(chunks, tor_strdup("we-have-enough-measurements\n"));
  } else {
    smartlist_add(chunks, tor_strdup("we-do-not-have-enough-measurements\n"));
  }

869
870
871
  DIGESTMAP_FOREACH(history_map, id, or_history_t *, hist) {
    routerinfo_t *ri;
    char dbuf[BASE64_DIGEST_LEN+1];
872
    char header_buf[512];
873
874
875
876
877
    char *info;
    digest_to_base64(dbuf, id);
    ri = router_get_by_digest(id);
    if (ri) {
      char *ip = tor_dup_ip(ri->addr);
878
879
880
881
882
      char tbuf[ISO_TIME_LEN+1];
      format_iso_time(tbuf, ri->cache_info.published_on);
      tor_snprintf(header_buf, sizeof(header_buf),
                   "router %s %s %s\n"
                   "published %s\n"
Roger Dingledine's avatar
Roger Dingledine committed
883
                   "relevant-flags %s%s%s\n"
884
885
886
887
888
889
890
                   "declared-uptime %ld\n",
                   dbuf, ri->nickname, ip,
                   tbuf,
                   ri->is_running ? "Running " : "",
                   ri->is_valid ? "Valid " : "",
                   ri->is_hibernating ? "Hibernating " : "",
                   ri->uptime);
891
892
893
894
895
896
897
898
899
      tor_free(ip);
    } else {
      tor_snprintf(header_buf, sizeof(header_buf),
                   "router %s {no descriptor}\n", dbuf);
    }
    smartlist_add(chunks, tor_strdup(header_buf));
    info = rep_hist_format_router_status(hist, now);
    if (info)
      smartlist_add(chunks, info);
900

901
902
903
904
905
906
907
908
909
910
911
  } DIGESTMAP_FOREACH_END;

  result = smartlist_join_strings(chunks, "", 0, NULL);
  SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp));
  smartlist_free(chunks);

  last_stability_doc = result;
  built_last_stability_doc_at = time(NULL);
  return result;
}

912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
/** Helper: return the first j >= i such that !strcmpstart(sl[j], prefix) and
 * such that no line sl[k] with i <= k < j starts with "R ".  Return -1 if no
 * such line exists. */
static int
find_next_with(smartlist_t *sl, int i, const char *prefix)
{
  for ( ; i < smartlist_len(sl); ++i) {
    const char *line = smartlist_get(sl, i);
    if (!strcmpstart(line, prefix))
      return i;
    if (!strcmpstart(line, "R "))
      return -1;
  }
  return -1;
}

928
/** How many bad times has parse_possibly_bad_iso_time parsed? */
929
static int n_bogus_times = 0;
930
931
/** Parse the ISO-formatted time in <b>s</b> into *<b>time_out</b>, but
 * rounds any pre-1970 date to Jan 1, 1970. */
932
933
934
935
936
937
938
static int
parse_possibly_bad_iso_time(const char *s, time_t *time_out)
{
  int year;
  char b[5];
  strlcpy(b, s, sizeof(b));
  b[4] = '\0';
939
  year = (int)tor_parse_long(b, 10, 0, INT_MAX, NULL, NULL);
940
941
942
943
944
945
946
947
  if (year < 1970) {
    *time_out = 0;
    ++n_bogus_times;
    return 0;
  } else
    return parse_iso_time(s, time_out);
}

948
949
950
951
952
/** We've read a time <b>t</b> from a file stored at <b>stored_at</b>, which
 * says we started measuring at <b>started_measuring</b>.  Return a new number
 * that's about as much before <b>now</b> as <b>t</b> was before
 * <b>stored_at</b>.
 */
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
static INLINE time_t
correct_time(time_t t, time_t now, time_t stored_at, time_t started_measuring)
{
  if (t < started_measuring - 24*60*60*365)
    return 0;
  else if (t < started_measuring)
    return started_measuring;
  else if (t > stored_at)
    return 0;
  else {
    long run_length = stored_at - t;
    t = now - run_length;
    if (t < started_measuring)
      t = started_measuring;
    return t;
  }
}

971
972
/** Load MTBF data from disk.  Returns 0 on success or recoverable error, -1
 * on failure. */
973
int
974
rep_hist_load_mtbf_data(time_t now)
975
976
977
978
979
{
  /* XXXX won't handle being called while history is already populated. */
  smartlist_t *lines;
  const char *line = NULL;
  int r=0, i;
980
981
  time_t last_downrated = 0, stored_at = 0, tracked_since = 0;
  time_t latest_possible_start = now;
982
  long format = -1;
983
984

  {
985
    char *filename = get_datadir_fname("router-stability");
986
    char *d = read_file_to_str(filename, RFTS_IGNORE_MISSING, NULL);
987
    tor_free(filename);
988
989
990
991
992
993
994
    if (!d)
      return -1;
    lines = smartlist_create();
    smartlist_split_string(lines, d, "\n", SPLIT_SKIP_SPACE, 0);
    tor_free(d);
  }

995
996
997
998
999
1000
  {
    const char *firstline;
    if (smartlist_len(lines)>4) {
      firstline = smartlist_get(lines, 0);
      if (!strcmpstart(firstline, "format "))
        format = tor_parse_long(firstline+strlen("format "),
For faster browsing, not all history is shown. View entire blame