Commit ac330d9b authored by Nick Mathewson's avatar Nick Mathewson 🌉
Browse files

New code to implement proposal for local geoip stats. Only enabled with...

New code to implement proposal for local geoip stats. Only enabled with --enable-geoip-stats passed to configure.

svn:r14802
parent 6e68c23d
......@@ -104,6 +104,9 @@ Changes in version 0.2.1.1-alpha - 2008-??-??
Robert Hogan. Fixes the first part of bug 681.
- Make bridge authorities never serve extrainfo docs.
- Allow comments in geoip file.
- New configure/torrc options (--enable-geoip-stats,
DirRecordUsageByCountry) to record how many IPs we've served directory
info to in each country code.
o Minor features (security):
- Reject requests for reverse-dns lookup of names in a private
......
......@@ -87,6 +87,13 @@ case $host in
;;
esac
AC_ARG_ENABLE(geoip-stats,
AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics))
if test "$enable_geoip_stats" = "yes"; then
AC_DEFINE(ENABLE_GEOIP_STATS, 1, [Defined if we try to collect per-country statistics])
fi
AC_ARG_ENABLE(gcc-warnings,
AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings))
......
......@@ -289,10 +289,10 @@ Bugs/issues for Tor 0.2.0.x:
too much.
o teach geoip_parse_entry() to skip over lines that start with #, so we
can put a little note at the top of the geoip file to say what it is.
N d we should have an off-by-default way for relays to dump geoip data to
. we should have an off-by-default way for relays to dump geoip data to
a file in their data directory, for measurement purposes.
- Basic implementation
- Include probability-of-selection
o Basic implementation
N - Include probability-of-selection
R d let bridges set relaybandwidthrate as low as 5kb
R - bug: if we launch using bridges, and then stop using bridges, we
still have our bridges in our entryguards section, and may use them.
......
......@@ -1559,7 +1559,6 @@ start_writing_to_file(const char *fname, int open_flags, int mode,
tor_assert((open_flags & (O_BINARY|O_TEXT)) != 0);
#endif
new_file->fd = -1;
tempname_len = strlen(fname)+16;
tor_assert(tempname_len > strlen(fname)); /*check for overflow*/
new_file->filename = tor_strdup(fname);
if (open_flags & O_APPEND) {
......@@ -1577,8 +1576,7 @@ start_writing_to_file(const char *fname, int open_flags, int mode,
new_file->rename_on_close = 1;
}
if ((new_file->fd = open(open_name, open_flags, mode))
< 0) {
if ((new_file->fd = open(open_name, open_flags, mode)) < 0) {
log(LOG_WARN, LD_FS, "Couldn't open \"%s\" (%s) for writing: %s",
open_name, fname, strerror(errno));
goto err;
......
......@@ -179,6 +179,9 @@ static config_var_t _option_vars[] = {
V(DirPolicy, LINELIST, NULL),
V(DirPort, UINT, "0"),
OBSOLETE("DirPostPeriod"),
#ifdef ENABLE_GEOIP_STATS
V(DirRecordUsageByCountry, BOOL, "0"),
#endif
VAR("DirServer", LINELIST, DirServers, NULL),
V(DNSPort, UINT, "0"),
V(DNSListenAddress, LINELIST, NULL),
......
......@@ -901,7 +901,7 @@ connection_or_set_state_open(or_connection_t *conn)
} else {
/* only report it to the geoip module if it's not a known router */
if (!router_get_by_digest(conn->identity_digest))
geoip_note_client_seen(TO_CONN(conn)->addr, now);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, TO_CONN(conn)->addr, now);
}
if (conn->handshake_state) {
or_handshake_state_free(conn->handshake_state);
......
......@@ -2484,6 +2484,26 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
goto done;
}
#ifdef ENABLE_GEOIP_STATS
{
geoip_client_action_t act =
is_v3 ? GEOIP_CLIENT_NETWORKSTATUS : GEOIP_CLIENT_NETWORKSTATUS_V2;
uint32_t addr = conn->_base.addr;
if (conn->_base.linked_conn) {
connection_t *c = conn->_base.linked_conn;
if (c->type == CONN_TYPE_EXIT) {
circuit_t *circ = TO_EDGE_CONN(c)->on_circuit;
if (! CIRCUIT_IS_ORIGIN(circ)) {
or_connection_t *orconn = TO_OR_CIRCUIT(circ)->p_conn;
addr = orconn->_base.addr;
}
}
}
geoip_note_client_seen(act, addr, time(NULL));
}
#endif
// note_request(request_type,dlen);
(void) request_type;
write_http_response_header(conn, -1, compressed,
......
......@@ -131,7 +131,7 @@ _geoip_compare_key_to_entry(const void *_key, const void **_member)
* "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
* where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
* integers, and CC is a country code.
*
*
* It also recognizes, and skips over, blank lines and lines that start
* with '#' (comments).
*/
......@@ -208,9 +208,12 @@ geoip_is_loaded(void)
typedef struct clientmap_entry_t {
HT_ENTRY(clientmap_entry_t) node;
uint32_t ipaddr;
time_t last_seen;
time_t last_seen; /* The last 2 bits of this value hold the client
* operation. */
} clientmap_entry_t;
#define ACTION_MASK 3
/** Map from client IP address to last time seen. */
static HT_HEAD(clientmap, clientmap_entry_t) client_history =
HT_INITIALIZER();
......@@ -238,12 +241,28 @@ HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
/** Note that we've seen a client connect from the IP <b>addr</b> (host order)
* at time <b>now</b>. Ignored by all but bridges. */
void
geoip_note_client_seen(uint32_t addr, time_t now)
geoip_note_client_seen(geoip_client_action_t action,
uint32_t addr, time_t now)
{
or_options_t *options = get_options();
clientmap_entry_t lookup, *ent;
if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
if (action == GEOIP_CLIENT_CONNECT) {
if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
return;
} else {
#ifndef ENABLE_GEOIP_STATS
return;
#else
if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
!options->DirRecordUsageByCountry)
return;
#endif
}
/* We use the low 3 bits of the time to encode the action. Since we're
* potentially remembering times of clients, we don't want to make
* clientmap_entry_t larger than it has to be. */
now = (now & ~ACTION_MASK) | (((int)action) & ACTION_MASK);
lookup.ipaddr = addr;
ent = HT_FIND(clientmap, &client_history, &lookup);
if (ent) {
......@@ -328,7 +347,7 @@ _c_hist_compare(const void **_a, const void **_b)
* that country, and cc is a lowercased country code. Returns NULL if we don't
* want to export geoip data yet. */
char *
geoip_get_client_history(time_t now)
geoip_get_client_history(time_t now, geoip_client_action_t action)
{
char *result = NULL;
if (!geoip_is_loaded())
......@@ -343,7 +362,10 @@ geoip_get_client_history(time_t now)
unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
unsigned total = 0;
HT_FOREACH(ent, clientmap, &client_history) {
int country = geoip_get_country_by_ip((*ent)->ipaddr);
int country;
if (((*ent)->last_seen & ACTION_MASK) != action)
continue;
country = geoip_get_country_by_ip((*ent)->ipaddr);
if (country < 0)
continue;
tor_assert(0 <= country && country < n_countries);
......@@ -404,6 +426,41 @@ geoip_get_client_history(time_t now)
return result;
}
void
dump_geoip_stats(void)
{
#ifdef ENABLE_GEOIP_STATS
time_t now = time(NULL);
char *filename = get_datadir_fname("geoip-stats");
char *data_v2 = NULL, *data_v3 = NULL;
char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
open_file_t *open_file = NULL;
FILE *out;
data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
format_iso_time(since, geoip_get_history_start());
format_iso_time(written, now);
if (!data_v2 || !data_v3)
goto done;
out = start_writing_to_stdio_file(filename, 0, 0600, &open_file);
if (!out)
goto done;
if (fprintf(out, "written %s\nstarted-at %s\nns %s\nns-v2%s\n",
written, since, data_v3, data_v2) < 0)
goto done;
finish_writing_to_file(open_file);
open_file = NULL;
done:
if (open_file)
abort_writing_to_file(open_file);
tor_free(filename);
tor_free(data_v2);
tor_free(data_v3);
#endif
}
/** Helper used to implement GETINFO ip-to-country/... controller command. */
int
getinfo_helper_geoip(control_connection_t *control_conn,
......
......@@ -832,6 +832,7 @@ run_scheduled_events(time_t now)
static time_t time_to_clean_caches = 0;
static time_t time_to_recheck_bandwidth = 0;
static time_t time_to_check_for_expired_networkstatus = 0;
static time_t time_to_dump_geoip_stats = 0;
or_options_t *options = get_options();
int i;
int have_dir_info;
......@@ -958,6 +959,12 @@ run_scheduled_events(time_t now)
time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
}
if (time_to_dump_geoip_stats < now) {
#define DUMP_GEOIP_STATS_INTERVAL (60*60);
time_to_dump_geoip_stats = now + DUMP_GEOIP_STATS_INTERVAL;
dump_geoip_stats();
}
/** 2. Periodically, we consider getting a new directory, getting a
* new running-routers list, and/or force-uploading our descriptor
* (if we've passed our internal checks). */
......
......@@ -2358,6 +2358,10 @@ typedef struct {
* count of how many client addresses have contacted us so that we can help
* the bridge authority guess which countries have blocked access to us. */
int BridgeRecordUsageByCountry;
#ifdef ENABLE_GEOIP_STATS
int DirRecordUsageByCountry;
#endif
/** Optionally, a file with GeoIP data. */
char *GeoIPFile;
......@@ -3294,13 +3298,27 @@ int geoip_get_country_by_ip(uint32_t ipaddr);
int geoip_get_n_countries(void);
const char *geoip_get_country_name(int num);
int geoip_is_loaded(void);
void geoip_note_client_seen(uint32_t addr, time_t now);
/** Indicates an action that we might be noting geoip statistics on.
* Note that if we're noticing CONNECT, we're a bridge, and if we're noticing
* the others, we're not.
*/
typedef enum {
/** We've noticed a connection as a bridge relay. */
GEOIP_CLIENT_CONNECT = 0,
/** We've served a networkstatus consensus as a directory server. */
GEOIP_CLIENT_NETWORKSTATUS = 1,
/** We've served a v2 networkstatus consensus as a directory server. */
GEOIP_CLIENT_NETWORKSTATUS_V2 = 2,
} geoip_client_action_t;
void geoip_note_client_seen(geoip_client_action_t action,
uint32_t addr, time_t now);
void geoip_remove_old_clients(time_t cutoff);
time_t geoip_get_history_start(void);
char *geoip_get_client_history(time_t now);
char *geoip_get_client_history(time_t now, geoip_client_action_t action);
int getinfo_helper_geoip(control_connection_t *control_conn,
const char *question, char **answer);
void geoip_free_all(void);
void dump_geoip_stats(void);
/********************************* hibernate.c **********************/
......
......@@ -1830,7 +1830,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
geoip_remove_old_clients(now-48*60*60);
last_purged_at = now;
}
geoip_summary = geoip_get_client_history(time(NULL));
geoip_summary = geoip_get_client_history(time(NULL), GEOIP_CLIENT_CONNECT);
if (geoip_summary) {
char geoip_start[ISO_TIME_LEN+1];
format_iso_time(geoip_start, geoip_get_history_start());
......
......@@ -3908,28 +3908,28 @@ test_geoip(void)
get_options()->BridgeRecordUsageByCountry = 1;
/* Put 9 observations in AB... */
for (i=32; i < 40; ++i)
geoip_note_client_seen(i, now);
geoip_note_client_seen(225, now);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, 225, now);
/* and 3 observations in XY, several times. */
for (j=0; j < 10; ++j)
for (i=52; i < 55; ++i)
geoip_note_client_seen(i, now-3600);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
/* and 17 observations in ZZ... */
for (i=110; i < 127; ++i)
geoip_note_client_seen(i, now-7200);
s = geoip_get_client_history(now+5*24*60*60);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-7200);
s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
test_assert(s);
test_streq("zz=24,ab=16", s);
tor_free(s);
/* Now clear out all the zz observations. */
geoip_remove_old_clients(now-6000);
s = geoip_get_client_history(now+5*24*60*60);
s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
test_assert(! s); /* There are only 12 observations left. Not enough to
build an answer. Add 4 more in XY... */
for (i=55; i < 59; ++i)
geoip_note_client_seen(i, now-3600);
s = geoip_get_client_history(now+5*24*60*60);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
test_assert(s);
test_streq("ab=16", s);
tor_free(s);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment