geoip.c 54.8 KB
Newer Older
1
/* Copyright (c) 2007-2013, The Tor Project, Inc. */
2
3
/* See LICENSE for licensing information */

4
5
/**
 * \file geoip.c
6
7
8
9
 * \brief Functions related to maintaining an IP-to-country database;
 * to summarizing client connections by country to entry guards, bridges,
 * and directory servers; and for statistics on answering network status
 * requests.
10
11
 */

12
13
14
#define GEOIP_PRIVATE
#include "or.h"
#include "ht.h"
Sebastian Hahn's avatar
Sebastian Hahn committed
15
#include "config.h"
Sebastian Hahn's avatar
Sebastian Hahn committed
16
#include "control.h"
17
#include "dnsserv.h"
Sebastian Hahn's avatar
Sebastian Hahn committed
18
#include "geoip.h"
Sebastian Hahn's avatar
Sebastian Hahn committed
19
#include "routerlist.h"
20

21
static void clear_geoip_db(void);
22
static void init_geoip_countries(void);
23

24
/** An entry from the GeoIP IPv4 file: maps an IPv4 range to a country. */
25
typedef struct geoip_ipv4_entry_t {
26
27
  uint32_t ip_low; /**< The lowest IP in the range, in host order */
  uint32_t ip_high; /**< The highest IP in the range, in host order */
28
  intptr_t country; /**< An index into geoip_countries */
29
} geoip_ipv4_entry_t;
30

nils's avatar
nils committed
31
32
33
34
35
36
/** An entry from the GeoIP IPv6 file: maps an IPv6 range to a country. */
typedef struct geoip_ipv6_entry_t {
  struct in6_addr ip_low; /**< The lowest IP in the range, in host order */
  struct in6_addr ip_high; /**< The highest IP in the range, in host order */
  intptr_t country; /**< An index into geoip_countries */
} geoip_ipv6_entry_t;
37

38
/** A per-country record for GeoIP request history. */
39
40
typedef struct geoip_country_t {
  char countrycode[3];
41
  uint32_t n_v3_ns_requests;
42
43
44
} geoip_country_t;

/** A list of geoip_country_t */
45
static smartlist_t *geoip_countries = NULL;
46
47
48
/** A map from lowercased country codes to their position in geoip_countries.
 * The index is encoded in the pointer, and 1 is added so that NULL can mean
 * not found. */
49
static strmap_t *country_idxplus1_by_lc_code = NULL;
50
51
/** Lists of all known geoip_ipv4_entry_t and geoip_ipv6_entry_t, sorted
 * by their respective ip_low. */
nils's avatar
nils committed
52
static smartlist_t *geoip_ipv4_entries = NULL, *geoip_ipv6_entries = NULL;
53

54
/** SHA1 digest of the GeoIP files to include in extra-info descriptors. */
55
static char geoip_digest[DIGEST_LEN];
56
static char geoip6_digest[DIGEST_LEN];
57

58
59
60
/** Return the index of the <b>country</b>'s entry in the GeoIP
 * country list if it is a valid 2-letter country code, otherwise
 * return -1. */
61
62
63
country_t
geoip_get_country(const char *country)
{
64
  void *idxplus1_;
65
66
  intptr_t idx;

67
68
  idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
  if (!idxplus1_)
69
70
    return -1;

71
  idx = ((uintptr_t)idxplus1_)-1;
72
73
74
  return (country_t)idx;
}

Nick Mathewson's avatar
Nick Mathewson committed
75
76
/** Add an entry to a GeoIP table, mapping all IP addresses between <b>low</b>
 * and <b>high</b>, inclusive, to the 2-letter country code <b>country</b>. */
77
static void
Linus Nordberg's avatar
Linus Nordberg committed
78
79
geoip_add_entry(const tor_addr_t *low, const tor_addr_t *high,
                const char *country)
80
{
81
  intptr_t idx;
82
  void *idxplus1_;
83

Linus Nordberg's avatar
Linus Nordberg committed
84
85
86
  if (tor_addr_family(low) != tor_addr_family(high))
    return;
  if (tor_addr_compare(high, low, CMP_EXACT) < 0)
87
88
    return;

89
  idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
90

91
  if (!idxplus1_) {
92
93
94
    geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
    strlcpy(c->countrycode, country, sizeof(c->countrycode));
    tor_strlower(c->countrycode);
95
    smartlist_add(geoip_countries, c);
96
    idx = smartlist_len(geoip_countries) - 1;
97
98
    strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
  } else {
99
    idx = ((uintptr_t)idxplus1_)-1;
100
  }
101
102
103
104
  {
    geoip_country_t *c = smartlist_get(geoip_countries, idx);
    tor_assert(!strcasecmp(c->countrycode, country));
  }
Linus Nordberg's avatar
Linus Nordberg committed
105
106
107
108
109
110
111
112
113
114
115
116
117
118

  if (tor_addr_family(low) == AF_INET) {
    geoip_ipv4_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv4_entry_t));
    ent->ip_low = tor_addr_to_ipv4h(low);
    ent->ip_high = tor_addr_to_ipv4h(high);
    ent->country = idx;
    smartlist_add(geoip_ipv4_entries, ent);
  } else if (tor_addr_family(low) == AF_INET6) {
    geoip_ipv6_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv6_entry_t));
    ent->ip_low = *tor_addr_to_in6(low);
    ent->ip_high = *tor_addr_to_in6(high);
    ent->country = idx;
    smartlist_add(geoip_ipv6_entries, ent);
  }
119
120
}

Linus Nordberg's avatar
Linus Nordberg committed
121
122
/** Add an entry to the GeoIP table indicated by <b>family</b>,
 * parsing it from <b>line</b>. The format is as for geoip_load_file(). */
123
STATIC int
Linus Nordberg's avatar
Linus Nordberg committed
124
geoip_parse_entry(const char *line, sa_family_t family)
125
{
Linus Nordberg's avatar
Linus Nordberg committed
126
127
128
129
  tor_addr_t low_addr, high_addr;
  char c[3];
  char *country = NULL;

130
131
  if (!geoip_countries)
    init_geoip_countries();
Linus Nordberg's avatar
Linus Nordberg committed
132
133
134
135
136
137
138
139
140
141
  if (family == AF_INET) {
    if (!geoip_ipv4_entries)
      geoip_ipv4_entries = smartlist_new();
  } else if (family == AF_INET6) {
    if (!geoip_ipv6_entries)
      geoip_ipv6_entries = smartlist_new();
  } else {
    log_warn(LD_GENERAL, "Unsupported family: %d", family);
    return -1;
  }
142

143
144
145
146
  while (TOR_ISSPACE(*line))
    ++line;
  if (*line == '#')
    return 0;
Linus Nordberg's avatar
Linus Nordberg committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

  if (family == AF_INET) {
    unsigned int low, high;
    if (tor_sscanf(line,"%u,%u,%2s", &low, &high, c) == 3 ||
        tor_sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, c) == 3) {
      tor_addr_from_ipv4h(&low_addr, low);
      tor_addr_from_ipv4h(&high_addr, high);
    } else
      goto fail;
    country = c;
  } else {                      /* AF_INET6 */
    char buf[512];
    char *low_str, *high_str;
    struct in6_addr low, high;
    char *strtok_state;
    strlcpy(buf, line, sizeof(buf));
    low_str = tor_strtok_r(buf, ",", &strtok_state);
    if (!low_str)
      goto fail;
    high_str = tor_strtok_r(NULL, ",", &strtok_state);
    if (!high_str)
      goto fail;
    country = tor_strtok_r(NULL, "\n", &strtok_state);
    if (!country)
      goto fail;
    if (strlen(country) != 2)
      goto fail;
    if (tor_inet_pton(AF_INET6, low_str, &low) <= 0)
      goto fail;
    tor_addr_from_in6(&low_addr, &low);
    if (tor_inet_pton(AF_INET6, high_str, &high) <= 0)
      goto fail;
    tor_addr_from_in6(&high_addr, &high);
180
  }
Linus Nordberg's avatar
Linus Nordberg committed
181
182
183
184
185
186
187
  geoip_add_entry(&low_addr, &high_addr, country);
  return 0;

  fail:
  log_warn(LD_GENERAL, "Unable to parse line from GEOIP %s file: %s",
           family == AF_INET ? "IPv4" : "IPv6", escaped(line));
  return -1;
188
189
190
}

/** Sorting helper: return -1, 1, or 0 based on comparison of two
191
 * geoip_ipv4_entry_t */
192
static int
193
geoip_ipv4_compare_entries_(const void **_a, const void **_b)
194
{
195
  const geoip_ipv4_entry_t *a = *_a, *b = *_b;
196
197
198
199
200
201
202
203
  if (a->ip_low < b->ip_low)
    return -1;
  else if (a->ip_low > b->ip_low)
    return 1;
  else
    return 0;
}

204
/** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
205
 * to a uint32_t in host order) to a geoip_ipv4_entry_t */
206
static int
207
geoip_ipv4_compare_key_to_entry_(const void *_key, const void **_member)
208
{
209
  /* No alignment issue here, since _key really is a pointer to uint32_t */
210
  const uint32_t addr = *(uint32_t *)_key;
211
  const geoip_ipv4_entry_t *entry = *_member;
212
213
214
215
216
217
218
219
  if (addr < entry->ip_low)
    return -1;
  else if (addr > entry->ip_high)
    return 1;
  else
    return 0;
}

nils's avatar
nils committed
220
221
222
/** Sorting helper: return -1, 1, or 0 based on comparison of two
 * geoip_ipv6_entry_t */
static int
223
geoip_ipv6_compare_entries_(const void **_a, const void **_b)
nils's avatar
nils committed
224
225
{
  const geoip_ipv6_entry_t *a = *_a, *b = *_b;
226
227
  return fast_memcmp(a->ip_low.s6_addr, b->ip_low.s6_addr,
                     sizeof(struct in6_addr));
nils's avatar
nils committed
228
229
}

Linus Nordberg's avatar
Linus Nordberg committed
230
/** bsearch helper: return -1, 1, or 0 based on comparison of an IPv6
231
 * (a pointer to a in6_addr) to a geoip_ipv6_entry_t */
nils's avatar
nils committed
232
static int
233
geoip_ipv6_compare_key_to_entry_(const void *_key, const void **_member)
nils's avatar
nils committed
234
235
236
237
{
  const struct in6_addr *addr = (struct in6_addr *)_key;
  const geoip_ipv6_entry_t *entry = *_member;

238
  if (fast_memcmp(addr->s6_addr, entry->ip_low.s6_addr,
239
             sizeof(struct in6_addr)) < 0)
nils's avatar
nils committed
240
    return -1;
241
  else if (fast_memcmp(addr->s6_addr, entry->ip_high.s6_addr,
242
                  sizeof(struct in6_addr)) > 0)
nils's avatar
nils committed
243
244
245
246
247
    return 1;
  else
    return 0;
}

Roger Dingledine's avatar
Roger Dingledine committed
248
249
250
/** Return 1 if we should collect geoip stats on bridge users, and
 * include them in our extrainfo descriptor. Else return 0. */
int
251
should_record_bridge_info(const or_options_t *options)
Roger Dingledine's avatar
Roger Dingledine committed
252
253
254
255
{
  return options->BridgeRelay && options->BridgeRecordUsageByCountry;
}

256
257
258
259
260
261
262
/** Set up a new list of geoip countries with no countries (yet) set in it,
 * except for the unknown country.
 */
static void
init_geoip_countries(void)
{
  geoip_country_t *geoip_unresolved;
263
  geoip_countries = smartlist_new();
264
265
266
267
268
269
270
271
272
273
  /* Add a geoip_country_t for requests that could not be resolved to a
   * country as first element (index 0) to geoip_countries. */
  geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t));
  strlcpy(geoip_unresolved->countrycode, "??",
          sizeof(geoip_unresolved->countrycode));
  smartlist_add(geoip_countries, geoip_unresolved);
  country_idxplus1_by_lc_code = strmap_new();
  strmap_set_lc(country_idxplus1_by_lc_code, "??", (void*)(1));
}

Linus Nordberg's avatar
Linus Nordberg committed
274
275
276
/** Clear appropriate GeoIP database, based on <b>family</b>, and
 * reload it from the file <b>filename</b>. Return 0 on success, -1 on
 * failure.
277
 *
Linus Nordberg's avatar
Linus Nordberg committed
278
 * Recognized line formats for IPv4 are:
279
280
281
282
283
 *   INTIPLOW,INTIPHIGH,CC
 * and
 *   "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
 * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
 * integers, and CC is a country code.
284
 *
Linus Nordberg's avatar
Linus Nordberg committed
285
286
287
288
 * Recognized line format for IPv6 is:
 *   IPV6LOW,IPV6HIGH,CC
 * where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
 *
289
290
 * It also recognizes, and skips over, blank lines and lines that start
 * with '#' (comments).
291
 */
292
int
Linus Nordberg's avatar
Linus Nordberg committed
293
geoip_load_file(sa_family_t family, const char *filename)
294
295
{
  FILE *f;
296
  const char *msg = "";
Linus Nordberg's avatar
Linus Nordberg committed
297
  const or_options_t *options = get_options();
298
  int severity = options_need_geoip_info(options, &msg) ? LOG_WARN : LOG_INFO;
299
  crypto_digest_t *geoip_digest_env = NULL;
nils's avatar
nils committed
300
301
302

  tor_assert(family == AF_INET || family == AF_INET6);

303
  if (!(f = tor_fopen_cloexec(filename, "r"))) {
304
305
    log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s.  %s",
           filename, msg);
306
307
    return -1;
  }
308
309
  if (!geoip_countries)
    init_geoip_countries();
nils's avatar
nils committed
310
311
312

  if (family == AF_INET) {
    if (geoip_ipv4_entries) {
Linus Nordberg's avatar
Linus Nordberg committed
313
314
      SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, e,
                        tor_free(e));
nils's avatar
nils committed
315
316
317
318
319
      smartlist_free(geoip_ipv4_entries);
    }
    geoip_ipv4_entries = smartlist_new();
  } else { /* AF_INET6 */
    if (geoip_ipv6_entries) {
Linus Nordberg's avatar
Linus Nordberg committed
320
321
      SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, e,
                        tor_free(e));
nils's avatar
nils committed
322
323
324
      smartlist_free(geoip_ipv6_entries);
    }
    geoip_ipv6_entries = smartlist_new();
325
  }
326
  geoip_digest_env = crypto_digest_new();
Linus Nordberg's avatar
Linus Nordberg committed
327
328

  log_notice(LD_GENERAL, "Parsing GEOIP %s file %s.",
Linus Nordberg's avatar
Linus Nordberg committed
329
             (family == AF_INET) ? "IPv4" : "IPv6", filename);
330
  while (!feof(f)) {
331
    char buf[512];
332
    if (fgets(buf, (int)sizeof(buf), f) == NULL)
333
      break;
334
    crypto_digest_add_bytes(geoip_digest_env, buf, strlen(buf));
335
    /* FFFF track full country name. */
Linus Nordberg's avatar
Linus Nordberg committed
336
    geoip_parse_entry(buf, family);
337
  }
338
  /*XXXX abort and return -1 if no entries/illformed?*/
339
340
  fclose(f);

341
342
343
  /* Sort list and remember file digests so that we can include it in
   * our extra-info descriptors. */
  if (family == AF_INET) {
344
    smartlist_sort(geoip_ipv4_entries, geoip_ipv4_compare_entries_);
345
346
347
348
349
    /* Okay, now we need to maybe change our mind about what is in
     * which country. We do this for IPv4 only since that's what we
     * store in node->country. */
    refresh_all_country_info();
    crypto_digest_get_digest(geoip_digest_env, geoip_digest, DIGEST_LEN);
350
351
  } else {
    /* AF_INET6 */
352
    smartlist_sort(geoip_ipv6_entries, geoip_ipv6_compare_entries_);
353
354
    crypto_digest_get_digest(geoip_digest_env, geoip6_digest, DIGEST_LEN);
  }
355
  crypto_digest_free(geoip_digest_env);
356

357
358
359
  return 0;
}

360
/** Given an IP address in host order, return a number representing the
361
362
363
364
 * country to which that address belongs, -1 for "No geoip information
 * available", or 0 for the 'unknown country'.  The return value will always
 * be less than geoip_get_n_countries().  To decode it, call
 * geoip_get_country_name().
365
 */
366
STATIC int
367
geoip_get_country_by_ipv4(uint32_t ipaddr)
368
{
369
370
  geoip_ipv4_entry_t *ent;
  if (!geoip_ipv4_entries)
371
    return -1;
Linus Nordberg's avatar
Linus Nordberg committed
372
373
  ent = smartlist_bsearch(geoip_ipv4_entries, &ipaddr,
                          geoip_ipv4_compare_key_to_entry_);
374
  return ent ? (int)ent->country : 0;
375
376
}

377
378
379
380
381
/** Given an IPv6 address, return a number representing the country to
 * which that address belongs, -1 for "No geoip information available", or
 * 0 for the 'unknown country'.  The return value will always be less than
 * geoip_get_n_countries().  To decode it, call geoip_get_country_name().
 */
382
STATIC int
nils's avatar
nils committed
383
384
385
386
387
388
geoip_get_country_by_ipv6(const struct in6_addr *addr)
{
  geoip_ipv6_entry_t *ent;

  if (!geoip_ipv6_entries)
    return -1;
Linus Nordberg's avatar
Linus Nordberg committed
389
390
  ent = smartlist_bsearch(geoip_ipv6_entries, addr,
                          geoip_ipv6_compare_key_to_entry_);
391
  return ent ? (int)ent->country : 0;
392
393
}

394
395
396
397
398
399
400
401
/** Given an IP address, return a number representing the country to which
 * that address belongs, -1 for "No geoip information available", or 0 for
 * the 'unknown country'.  The return value will always be less than
 * geoip_get_n_countries().  To decode it, call geoip_get_country_name().
 */
int
geoip_get_country_by_addr(const tor_addr_t *addr)
{
nils's avatar
nils committed
402
403
404
405
406
  if (tor_addr_family(addr) == AF_INET) {
    return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr));
  } else if (tor_addr_family(addr) == AF_INET6) {
    return geoip_get_country_by_ipv6(tor_addr_to_in6(addr));
  } else {
407
    return -1;
408
  }
409
410
}

411
/** Return the number of countries recognized by the GeoIP country list. */
412
413
414
int
geoip_get_n_countries(void)
{
Sebastian Hahn's avatar
Sebastian Hahn committed
415
416
  if (!geoip_countries)
    init_geoip_countries();
417
  return (int) smartlist_len(geoip_countries);
418
419
}

420
421
/** Return the two-letter country code associated with the number <b>num</b>,
 * or "??" for an unknown value. */
422
const char *
423
geoip_get_country_name(country_t num)
424
{
425
426
427
428
  if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
    geoip_country_t *c = smartlist_get(geoip_countries, num);
    return c->countrycode;
  } else
429
430
431
    return "??";
}

432
/** Return true iff we have loaded a GeoIP database.*/
433
int
434
geoip_is_loaded(sa_family_t family)
435
{
436
437
438
439
440
441
442
  tor_assert(family == AF_INET || family == AF_INET6);
  if (geoip_countries == NULL)
    return 0;
  if (family == AF_INET)
    return geoip_ipv4_entries != NULL;
  else                          /* AF_INET6 */
    return geoip_ipv6_entries != NULL;
443
444
}

445
446
447
/** Return the hex-encoded SHA1 digest of the loaded GeoIP file. The
 * result does not need to be deallocated, but will be overwritten by the
 * next call of hex_str(). */
448
const char *
449
geoip_db_digest(sa_family_t family)
450
{
451
452
453
454
455
  tor_assert(family == AF_INET || family == AF_INET6);
  if (family == AF_INET)
    return hex_str(geoip_digest, DIGEST_LEN);
  else                          /* AF_INET6 */
    return hex_str(geoip6_digest, DIGEST_LEN);
456
457
}

458
459
460
/** Entry in a map from IP address to the last time we've seen an incoming
 * connection from that IP address. Used by bridges only, to track which
 * countries have them blocked. */
461
462
typedef struct clientmap_entry_t {
  HT_ENTRY(clientmap_entry_t) node;
463
  tor_addr_t addr;
464
465
466
467
 /* Name of pluggable transport used by this client. NULL if no
    pluggable transport was used. */
  char *transport_name;

468
469
470
471
  /** Time when we last saw this IP address, in MINUTES since the epoch.
   *
   * (This will run out of space around 4011 CE.  If Tor is still in use around
   * 4000 CE, please remember to add more bits to last_seen_in_minutes.) */
Karsten Loesing's avatar
Karsten Loesing committed
472
473
  unsigned int last_seen_in_minutes:30;
  unsigned int action:2;
474
475
} clientmap_entry_t;

476
477
478
479
/** Largest allowable value for last_seen_in_minutes.  (It's a 30-bit field,
 * so it can hold up to (1u<<30)-1, or 0x3fffffffu.
 */
#define MAX_LAST_SEEN_IN_MINUTES 0X3FFFFFFFu
480

481
/** Map from client IP address to last time seen. */
482
483
static HT_HEAD(clientmap, clientmap_entry_t) client_history =
     HT_INITIALIZER();
484

485
/** Hashtable helper: compute a hash of a clientmap_entry_t. */
486
487
488
static INLINE unsigned
clientmap_entry_hash(const clientmap_entry_t *a)
{
489
490
  unsigned h = (unsigned) tor_addr_hash(&a->addr);

491
  if (a->transport_name)
492
493
494
    h += (unsigned) siphash24g(a->transport_name, strlen(a->transport_name));

  return h;
495
}
496
/** Hashtable helper: compare two clientmap_entry_t values for equality. */
497
498
499
static INLINE int
clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b)
{
500
  if (strcmp_opt(a->transport_name, b->transport_name))
501
502
    return 0;

503
504
  return !tor_addr_compare(&a->addr, &b->addr, CMP_EXACT) &&
         a->action == b->action;
505
506
507
508
509
510
511
}

HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
             clientmap_entries_eq);
HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
            clientmap_entries_eq, 0.6, malloc, realloc, free);

Nick Mathewson's avatar
Nick Mathewson committed
512
513
514
515
516
517
518
519
520
521
522
/** Free all storage held by <b>ent</b>. */
static void
clientmap_entry_free(clientmap_entry_t *ent)
{
  if (!ent)
    return;

  tor_free(ent->transport_name);
  tor_free(ent);
}

523
524
525
526
527
528
529
530
531
532
/** Clear history of connecting clients used by entry and bridge stats. */
static void
client_history_clear(void)
{
  clientmap_entry_t **ent, **next, *this;
  for (ent = HT_START(clientmap, &client_history); ent != NULL;
       ent = next) {
    if ((*ent)->action == GEOIP_CLIENT_CONNECT) {
      this = *ent;
      next = HT_NEXT_RMV(clientmap, &client_history, ent);
Nick Mathewson's avatar
Nick Mathewson committed
533
      clientmap_entry_free(this);
534
535
536
537
538
539
    } else {
      next = HT_NEXT(clientmap, &client_history, ent);
    }
  }
}

540
/** Note that we've seen a client connect from the IP <b>addr</b>
Karsten Loesing's avatar
Karsten Loesing committed
541
542
 * at time <b>now</b>. Ignored by all but bridges and directories if
 * configured accordingly. */
543
void
544
geoip_note_client_seen(geoip_client_action_t action,
545
546
547
                       const tor_addr_t *addr,
                       const char *transport_name,
                       time_t now)
548
{
549
  const or_options_t *options = get_options();
550
  clientmap_entry_t lookup, *ent;
551
552
  memset(&lookup, 0, sizeof(clientmap_entry_t));

553
  if (action == GEOIP_CLIENT_CONNECT) {
554
    /* Only remember statistics as entry guard or as bridge. */
555
    if (!options->EntryStatistics &&
556
        (!(options->BridgeRelay && options->BridgeRecordUsageByCountry)))
557
      return;
558
  } else {
Karsten Loesing's avatar
Karsten Loesing committed
559
560
    if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
        !options->DirReqStatistics)
561
562
563
      return;
  }

564
565
566
567
  log_debug(LD_GENERAL, "Seen client from '%s' with transport '%s'.",
            safe_str_client(fmt_addr((addr))),
            transport_name ? transport_name : "<no transport>");

568
  tor_addr_copy(&lookup.addr, addr);
Karsten Loesing's avatar
Karsten Loesing committed
569
  lookup.action = (int)action;
Nick Mathewson's avatar
Nick Mathewson committed
570
  lookup.transport_name = (char*) transport_name;
571
  ent = HT_FIND(clientmap, &client_history, &lookup);
572

573
  if (! ent) {
574
    ent = tor_malloc_zero(sizeof(clientmap_entry_t));
575
    tor_addr_copy(&ent->addr, addr);
576
577
    if (transport_name)
      ent->transport_name = tor_strdup(transport_name);
Karsten Loesing's avatar
Karsten Loesing committed
578
    ent->action = (int)action;
579
580
    HT_INSERT(clientmap, &client_history, ent);
  }
581
  if (now / 60 <= (int)MAX_LAST_SEEN_IN_MINUTES && now >= 0)
582
583
584
    ent->last_seen_in_minutes = (unsigned)(now/60);
  else
    ent->last_seen_in_minutes = 0;
585

586
  if (action == GEOIP_CLIENT_NETWORKSTATUS) {
587
    int country_idx = geoip_get_country_by_addr(addr);
588
589
    if (country_idx < 0)
      country_idx = 0; /** unresolved requests are stored at index 0. */
590
591
    if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) {
      geoip_country_t *country = smartlist_get(geoip_countries, country_idx);
592
      ++country->n_v3_ns_requests;
593
594
    }
  }
595
596
}

597
598
/** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
 * older than a certain time. */
599
static int
600
remove_old_client_helper_(struct clientmap_entry_t *ent, void *_cutoff)
601
{
Karsten Loesing's avatar
Karsten Loesing committed
602
603
  time_t cutoff = *(time_t*)_cutoff / 60;
  if (ent->last_seen_in_minutes < cutoff) {
Nick Mathewson's avatar
Nick Mathewson committed
604
    clientmap_entry_free(ent);
605
606
607
608
609
610
    return 1;
  } else {
    return 0;
  }
}

611
/** Forget about all clients that haven't connected since <b>cutoff</b>. */
612
613
614
615
void
geoip_remove_old_clients(time_t cutoff)
{
  clientmap_HT_FOREACH_FN(&client_history,
616
                          remove_old_client_helper_,
617
618
619
                          &cutoff);
}

620
621
622
623
/** How many responses are we giving to clients requesting v3 network
 * statuses? */
static uint32_t ns_v3_responses[GEOIP_NS_RESPONSE_NUM];

624
625
/** Note that we've rejected a client's request for a v3 network status
 * for reason <b>reason</b> at time <b>now</b>. */
626
void
627
geoip_note_ns_response(geoip_ns_response_t response)
628
629
{
  static int arrays_initialized = 0;
Karsten Loesing's avatar
Karsten Loesing committed
630
631
  if (!get_options()->DirReqStatistics)
    return;
632
633
634
635
636
  if (!arrays_initialized) {
    memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
    arrays_initialized = 1;
  }
  tor_assert(response < GEOIP_NS_RESPONSE_NUM);
637
  ns_v3_responses[response]++;
638
639
}

640
/** Do not mention any country from which fewer than this number of IPs have
641
642
 * connected.  This conceivably avoids reporting information that could
 * deanonymize users, though analysis is lacking. */
643
#define MIN_IPS_TO_NOTE_COUNTRY 1
644
645
/** Do not report any geoip data at all if we have fewer than this number of
 * IPs to report about. */
646
#define MIN_IPS_TO_NOTE_ANYTHING 1
647
/** When reporting geoip data about countries, round up to the nearest
648
 * multiple of this value. */
649
650
#define IP_GRANULARITY 8

651
/** Helper type: used to sort per-country totals by value. */
652
typedef struct c_hist_t {
653
654
  char country[3]; /**< Two-letter country code. */
  unsigned total; /**< Total IP addresses seen in this country. */
655
656
657
} c_hist_t;

/** Sorting helper: return -1, 1, or 0 based on comparison of two
658
 * geoip_ipv4_entry_t.  Sort in descending order of total, and then by country
659
660
 * code. */
static int
661
c_hist_compare_(const void **_a, const void **_b)
662
663
664
665
666
667
668
669
670
671
{
  const c_hist_t *a = *_a, *b = *_b;
  if (a->total > b->total)
    return -1;
  else if (a->total < b->total)
    return 1;
  else
    return strcmp(a->country, b->country);
}

672
673
674
675
676
/** When there are incomplete directory requests at the end of a 24-hour
 * period, consider those requests running for longer than this timeout as
 * failed, the others as still running. */
#define DIRREQ_TIMEOUT (10*60)

677
/** Entry in a map from either chan->global_identifier for direct requests
678
679
680
681
 * or a unique circuit identifier for tunneled requests to request time,
 * response size, and completion time of a network status request. Used to
 * measure download times of requests to derive average client
 * bandwidths. */
682
typedef struct dirreq_map_entry_t {
683
  HT_ENTRY(dirreq_map_entry_t) node;
684
  /** Unique identifier for this network status request; this is either the
685
   * chan->global_identifier of the dir channel (direct request) or a new
686
687
   * locally unique identifier of a circuit (tunneled request). This ID is
   * only unique among other direct or tunneled requests, respectively. */
688
689
690
691
  uint64_t dirreq_id;
  unsigned int state:3; /**< State of this directory request. */
  unsigned int type:1; /**< Is this a direct or a tunneled request? */
  unsigned int completed:1; /**< Is this request complete? */
692
693
694
695
  /** When did we receive the request and started sending the response? */
  struct timeval request_time;
  size_t response_size; /**< What is the size of the response in bytes? */
  struct timeval completion_time; /**< When did the request succeed? */
696
} dirreq_map_entry_t;
697
698

/** Map of all directory requests asking for v2 or v3 network statuses in
699
 * the current geoip-stats interval. Values are
700
 * of type *<b>dirreq_map_entry_t</b>. */
701
702
703
704
705
706
707
708
709
710
static HT_HEAD(dirreqmap, dirreq_map_entry_t) dirreq_map =
     HT_INITIALIZER();

static int
dirreq_map_ent_eq(const dirreq_map_entry_t *a,
                  const dirreq_map_entry_t *b)
{
  return a->dirreq_id == b->dirreq_id && a->type == b->type;
}

711
/* DOCDOC dirreq_map_ent_hash */
712
713
714
715
716
717
718
719
720
721
722
723
static unsigned
dirreq_map_ent_hash(const dirreq_map_entry_t *entry)
{
  unsigned u = (unsigned) entry->dirreq_id;
  u += entry->type << 20;
  return u;
}

HT_PROTOTYPE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
             dirreq_map_ent_eq);
HT_GENERATE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
            dirreq_map_ent_eq, 0.6, malloc, realloc, free);
724
725

/** Helper: Put <b>entry</b> into map of directory requests using
726
 * <b>type</b> and <b>dirreq_id</b> as key parts. If there is
727
728
 * already an entry for that key, print out a BUG warning and return. */
static void
729
dirreq_map_put_(dirreq_map_entry_t *entry, dirreq_type_t type,
730
               uint64_t dirreq_id)
731
{
732
733
734
735
  dirreq_map_entry_t *old_ent;
  tor_assert(entry->type == type);
  tor_assert(entry->dirreq_id == dirreq_id);

736
737
738
  /* XXXX we could switch this to HT_INSERT some time, since it seems that
   * this bug doesn't happen. But since this function doesn't seem to be
   * critical-path, it's sane to leave it alone. */
739
740
  old_ent = HT_REPLACE(dirreqmap, &dirreq_map, entry);
  if (old_ent && old_ent != entry) {
741
    log_warn(LD_BUG, "Error when putting directory request into local "
742
             "map. There was already an entry for the same identifier.");
743
744
745
746
747
    return;
  }
}

/** Helper: Look up and return an entry in the map of directory requests
748
 * using <b>type</b> and <b>dirreq_id</b> as key parts. If there
749
 * is no such entry, return NULL. */
750
static dirreq_map_entry_t *
751
dirreq_map_get_(dirreq_type_t type, uint64_t dirreq_id)
752
{
753
754
755
756
  dirreq_map_entry_t lookup;
  lookup.type = type;
  lookup.dirreq_id = dirreq_id;
  return HT_FIND(dirreqmap, &dirreq_map, &lookup);
757
758
759
}

/** Note that an either direct or tunneled (see <b>type</b>) directory
760
761
 * request for a v3 network status with unique ID <b>dirreq_id</b> of size
 * <b>response_size</b> has started. */
762
void
763
geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
764
                   dirreq_type_t type)
765
{
Karsten Loesing's avatar
Karsten Loesing committed
766
767
768
769
  dirreq_map_entry_t *ent;
  if (!get_options()->DirReqStatistics)
    return;
  ent = tor_malloc_zero(sizeof(dirreq_map_entry_t));
770
  ent->dirreq_id = dirreq_id;
771
772
773
  tor_gettimeofday(&ent->request_time);
  ent->response_size = response_size;
  ent->type = type;
774
  dirreq_map_put_(ent, type, dirreq_id);
775
776
777
}

/** Change the state of the either direct or tunneled (see <b>type</b>)
778
 * directory request with <b>dirreq_id</b> to <b>new_state</b> and
779
780
781
782
783
 * possibly mark it as completed. If no entry can be found for the given
 * key parts (e.g., if this is a directory request that we are not
 * measuring, or one that was started in the previous measurement period),
 * or if the state cannot be advanced to <b>new_state</b>, do nothing. */
void
784
785
geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
                          dirreq_state_t new_state)
786
{
Karsten Loesing's avatar
Karsten Loesing committed
787
788
789
  dirreq_map_entry_t *ent;
  if (!get_options()->DirReqStatistics)
    return;
790
  ent = dirreq_map_get_(type, dirreq_id);
791
792
  if (!ent)
    return;
793
  if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS)
794
795
796
797
    return;
  if (new_state - 1 != ent->state)
    return;
  ent->state = new_state;
798
799
800
  if ((type == DIRREQ_DIRECT &&
         new_state == DIRREQ_FLUSHING_DIR_CONN_FINISHED) ||
      (type == DIRREQ_TUNNELED &&
801
         new_state == DIRREQ_CHANNEL_BUFFER_FLUSHED)) {
802
803
804
805
806
    tor_gettimeofday(&ent->completion_time);
    ent->completed = 1;
  }
}

807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
/** Return the bridge-ip-transports string that should be inserted in
 *  our extra-info descriptor. Return NULL if the bridge-ip-transports
 *  line should be empty.  */
char *
geoip_get_transport_history(void)
{
  unsigned granularity = IP_GRANULARITY;
  /** String hash table <name of transport> -> <number of users>. */
  strmap_t *transport_counts = strmap_new();

  /** Smartlist that contains copies of the names of the transports
      that have been used. */
  smartlist_t *transports_used = smartlist_new();

  /* Special string to signify that no transport was used for this
     connection. Pluggable transport names can't have symbols in their
     names, so this string will never collide with a real transport. */
  static const char* no_transport_str = "<OR>";

  clientmap_entry_t **ent;
  const char *transport_name = NULL;
  smartlist_t *string_chunks = smartlist_new();
  char *the_string = NULL;

  /* If we haven't seen any clients yet, return NULL. */
  if (HT_EMPTY(&client_history))
    goto done;

  /** We do the following steps to form the transport history string:
   *  a) Foreach client that uses a pluggable transport, we increase the
   *  times that transport was used by one. If the client did not use
   *  a transport, we increase the number of times someone connected
   *  without obfuscation.
   *  b) Foreach transport we observed, we write its transport history
   *  string and push it to string_chunks. So, for example, if we've
   *  seen 665 obfs2 clients, we write "obfs2=665".
   *  c) We concatenate string_chunks to form the final string.
   */

  log_debug(LD_GENERAL,"Starting iteration for transport history. %d clients.",
            HT_SIZE(&client_history));

  /* Loop through all clients. */
  HT_FOREACH(ent, clientmap, &client_history) {
851
852
    uintptr_t val;
    void *ptr;
853
854
855
856
857
858
    transport_name = (*ent)->transport_name;
    if (!transport_name)
      transport_name = no_transport_str;

    /* Increase the count for this transport name. */
    ptr = strmap_get(transport_counts, transport_name);
859
    val = (uintptr_t)ptr;
860
861
862
863
864
    val++;
    ptr = (void*)val;
    strmap_set(transport_counts, transport_name, ptr);

    /* If it's the first time we see this transport, note it. */
865
    if (val == 1)
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
      smartlist_add(transports_used, tor_strdup(transport_name));

    log_debug(LD_GENERAL, "Client from '%s' with transport '%s'. "
              "I've now seen %d clients.",
              safe_str_client(fmt_addr(&(*ent)->addr)),
              transport_name ? transport_name : "<no transport>",
              (int)val);
  }

  /* Sort the transport names (helps with unit testing). */
  smartlist_sort_strings(transports_used);

  /* Loop through all seen transports. */
  SMARTLIST_FOREACH_BEGIN(transports_used, const char *, transport_name) {
    void *transport_count_ptr = strmap_get(transport_counts, transport_name);
881
    uintptr_t transport_count = (uintptr_t) transport_count_ptr;
882

883
884
    log_debug(LD_GENERAL, "We got "U64_FORMAT" clients with transport '%s'.",
              U64_PRINTF_ARG((uint64_t)transport_count), transport_name);
885

886
    smartlist_add_asprintf(string_chunks, "%s="U64_FORMAT,
887
                           transport_name,
888
889
890
                           U64_PRINTF_ARG(round_uint64_to_next_multiple_of(
                                               (uint64_t)transport_count,
                                               granularity)));
891
892
  } SMARTLIST_FOREACH_END(transport_name);

893
  the_string = smartlist_join_strings(string_chunks, ",", 0, NULL);
894
895
896
897
898
899
900
901
902
903
904
905
906

  log_debug(LD_GENERAL, "Final bridge-ip-transports string: '%s'", the_string);

 done:
  strmap_free(transport_counts, NULL);
  SMARTLIST_FOREACH(transports_used, char *, s, tor_free(s));
  smartlist_free(transports_used);
  SMARTLIST_FOREACH(string_chunks, char *, s, tor_free(s));
  smartlist_free(string_chunks);

  return the_string;
}

907
908
909
910
911
912
/** Return a newly allocated comma-separated string containing statistics
 * on network status downloads. The string contains the number of completed
 * requests, timeouts, and still running requests as well as the download
 * times by deciles and quartiles. Return NULL if we have not observed
 * requests for long enough. */
static char *
913
geoip_get_dirreq_history(dirreq_type_t type)
914
915
{
  char *result = NULL;
916
  smartlist_t *dirreq_completed = NULL;
917
  uint32_t complete = 0, timeouts = 0, running = 0;
918
  int bufsize = 1024, written;
919
  dirreq_map_entry_t **ptr, **next, *ent;
920
  struct timeval now;
921

922
  tor_gettimeofday(&now);
923
  dirreq_completed = smartlist_new();
924
925
  for (ptr = HT_START(dirreqmap, &dirreq_map); ptr; ptr = next) {
    ent = *ptr;
926
    if (ent->type != type) {
927
928
929
      next = HT_NEXT(dirreqmap, &dirreq_map, ptr);
      continue;
    } else {
930
      if (ent->completed) {
931
        smartlist_add(dirreq_completed, ent);
932
        complete++;
933
        next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
934
      } else {
935
        if (tv_mdiff(&ent->request_time, &now) / 1000 > DIRREQ_TIMEOUT)
936
937
938
          timeouts++;
        else
          running++;
939
940
        next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
        tor_free(ent);
941
942
      }
    }
943
  }
944
945
946
947
948
949
950
#define DIR_REQ_GRANULARITY 4
  complete = round_uint32_to_next_multiple_of(complete,
                                              DIR_REQ_GRANULARITY);
  timeouts = round_uint32_to_next_multiple_of(timeouts,
                                              DIR_REQ_GRANULARITY);
  running = round_uint32_to_next_multiple_of(running,
                                             DIR_REQ_GRANULARITY);
951
  result = tor_malloc_zero(bufsize);
952
953
  written = tor_snprintf(result, bufsize, "complete=%u,timeout=%u,"
                         "running=%u", complete, timeouts, running);
954
  if (written < 0) {
955
956
    tor_free(result);
    goto done;
957
  }
958

959
960
#define MIN_DIR_REQ_RESPONSES 16
  if (complete >= MIN_DIR_REQ_RESPONSES) {
961
962
963
964
965
966
967
968
969
970
971
972
973
    uint32_t *dltimes;
    /* We may have rounded 'completed' up.  Here we want to use the
     * real value. */
    complete = smartlist_len(dirreq_completed);
    dltimes = tor_malloc_zero(sizeof(uint32_t) * complete);
    SMARTLIST_FOREACH_BEGIN(dirreq_completed, dirreq_map_entry_t *, ent) {
      uint32_t bytes_per_second;
      uint32_t time_diff = (uint32_t) tv_mdiff(&ent->request_time,
                                               &ent->completion_time);
      if (time_diff == 0)
        time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible
                        * by law of nature or something, but a milisecond
                        * is a bit greater than "instantly" */
974
      bytes_per_second = (uint32_t)(1000 * ent->response_size / time_diff);
975
976
977
      dltimes[ent_sl_idx] = bytes_per_second;
    } SMARTLIST_FOREACH_END(ent);
    median_uint32(dltimes, complete); /* sorts as a side effect. */
978
    written = tor_snprintf(result + written, bufsize - written,
979
980
                           ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u,"
                           "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u",
981
982
983
984
985
986
987
988
989
990
991
992
993
                           dltimes[0],
                           dltimes[1*complete/10-1],
                           dltimes[2*complete/10-1],
                           dltimes[1*complete/4-1],
                           dltimes[3*complete/10-1],
                           dltimes[4*complete/10-1],
                           dltimes[5*complete/10-1],
                           dltimes[6*complete/10-1],
                           dltimes[7*complete/10-1],
                           dltimes[3*complete/4-1],
                           dltimes[8*complete/10-1],
                           dltimes[9*complete/10-1],
                           dltimes[complete-1]);
994
995
    if (written<0)
      tor_free(result);
996
997
    tor_free(dltimes);
  }
998
999
1000
 done:
  SMARTLIST_FOREACH(dirreq_completed, dirreq_map_entry_t *, ent,
                    tor_free(ent));