Commit ac330d9b authored by Nick Mathewson's avatar Nick Mathewson
Browse files

New code to implement proposal for local geoip stats. Only enabled with...

New code to implement proposal for local geoip stats. Only enabled with --enable-geoip-stats passed to configure.

svn:r14802
parent 6e68c23d
......@@ -104,6 +104,9 @@ Changes in version 0.2.1.1-alpha - 2008-??-??
Robert Hogan. Fixes the first part of bug 681.
- Make bridge authorities never serve extrainfo docs.
- Allow comments in geoip file.
- New configure/torrc options (--enable-geoip-stats,
DirRecordUsageByCountry) to record how many IPs we've served directory
info to in each country code.
o Minor features (security):
- Reject requests for reverse-dns lookup of names in a private
......
......@@ -87,6 +87,13 @@ case $host in
;;
esac
AC_ARG_ENABLE(geoip-stats,
AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics))
if test "$enable_geoip_stats" = "yes"; then
AC_DEFINE(ENABLE_GEOIP_STATS, 1, [Defined if we try to collect per-country statistics])
fi
AC_ARG_ENABLE(gcc-warnings,
AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings))
......
......@@ -289,10 +289,10 @@ Bugs/issues for Tor 0.2.0.x:
too much.
o teach geoip_parse_entry() to skip over lines that start with #, so we
can put a little note at the top of the geoip file to say what it is.
N d we should have an off-by-default way for relays to dump geoip data to
. we should have an off-by-default way for relays to dump geoip data to
a file in their data directory, for measurement purposes.
- Basic implementation
- Include probability-of-selection
o Basic implementation
N - Include probability-of-selection
R d let bridges set relaybandwidthrate as low as 5kb
R - bug: if we launch using bridges, and then stop using bridges, we
still have our bridges in our entryguards section, and may use them.
......
......@@ -1559,7 +1559,6 @@ start_writing_to_file(const char *fname, int open_flags, int mode,
tor_assert((open_flags & (O_BINARY|O_TEXT)) != 0);
#endif
new_file->fd = -1;
tempname_len = strlen(fname)+16;
tor_assert(tempname_len > strlen(fname)); /*check for overflow*/
new_file->filename = tor_strdup(fname);
if (open_flags & O_APPEND) {
......@@ -1577,8 +1576,7 @@ start_writing_to_file(const char *fname, int open_flags, int mode,
new_file->rename_on_close = 1;
}
if ((new_file->fd = open(open_name, open_flags, mode))
< 0) {
if ((new_file->fd = open(open_name, open_flags, mode)) < 0) {
log(LOG_WARN, LD_FS, "Couldn't open \"%s\" (%s) for writing: %s",
open_name, fname, strerror(errno));
goto err;
......
......@@ -179,6 +179,9 @@ static config_var_t _option_vars[] = {
V(DirPolicy, LINELIST, NULL),
V(DirPort, UINT, "0"),
OBSOLETE("DirPostPeriod"),
#ifdef ENABLE_GEOIP_STATS
V(DirRecordUsageByCountry, BOOL, "0"),
#endif
VAR("DirServer", LINELIST, DirServers, NULL),
V(DNSPort, UINT, "0"),
V(DNSListenAddress, LINELIST, NULL),
......
......@@ -901,7 +901,7 @@ connection_or_set_state_open(or_connection_t *conn)
} else {
/* only report it to the geoip module if it's not a known router */
if (!router_get_by_digest(conn->identity_digest))
geoip_note_client_seen(TO_CONN(conn)->addr, now);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, TO_CONN(conn)->addr, now);
}
if (conn->handshake_state) {
or_handshake_state_free(conn->handshake_state);
......
......@@ -2484,6 +2484,26 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
goto done;
}
#ifdef ENABLE_GEOIP_STATS
{
geoip_client_action_t act =
is_v3 ? GEOIP_CLIENT_NETWORKSTATUS : GEOIP_CLIENT_NETWORKSTATUS_V2;
uint32_t addr = conn->_base.addr;
if (conn->_base.linked_conn) {
connection_t *c = conn->_base.linked_conn;
if (c->type == CONN_TYPE_EXIT) {
circuit_t *circ = TO_EDGE_CONN(c)->on_circuit;
if (! CIRCUIT_IS_ORIGIN(circ)) {
or_connection_t *orconn = TO_OR_CIRCUIT(circ)->p_conn;
addr = orconn->_base.addr;
}
}
}
geoip_note_client_seen(act, addr, time(NULL));
}
#endif
// note_request(request_type,dlen);
(void) request_type;
write_http_response_header(conn, -1, compressed,
......
......@@ -131,7 +131,7 @@ _geoip_compare_key_to_entry(const void *_key, const void **_member)
* "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
* where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
* integers, and CC is a country code.
*
*
* It also recognizes, and skips over, blank lines and lines that start
* with '#' (comments).
*/
......@@ -208,9 +208,12 @@ geoip_is_loaded(void)
typedef struct clientmap_entry_t {
HT_ENTRY(clientmap_entry_t) node;
uint32_t ipaddr;
time_t last_seen;
time_t last_seen; /* The last 2 bits of this value hold the client
* operation. */
} clientmap_entry_t;
#define ACTION_MASK 3
/** Map from client IP address to last time seen. */
static HT_HEAD(clientmap, clientmap_entry_t) client_history =
HT_INITIALIZER();
......@@ -238,12 +241,28 @@ HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
/** Note that we've seen a client connect from the IP <b>addr</b> (host order)
* at time <b>now</b>. Ignored by all but bridges. */
void
geoip_note_client_seen(uint32_t addr, time_t now)
geoip_note_client_seen(geoip_client_action_t action,
uint32_t addr, time_t now)
{
or_options_t *options = get_options();
clientmap_entry_t lookup, *ent;
if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
if (action == GEOIP_CLIENT_CONNECT) {
if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
return;
} else {
#ifndef ENABLE_GEOIP_STATS
return;
#else
if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
!options->DirRecordUsageByCountry)
return;
#endif
}
/* We use the low 3 bits of the time to encode the action. Since we're
* potentially remembering times of clients, we don't want to make
* clientmap_entry_t larger than it has to be. */
now = (now & ~ACTION_MASK) | (((int)action) & ACTION_MASK);
lookup.ipaddr = addr;
ent = HT_FIND(clientmap, &client_history, &lookup);
if (ent) {
......@@ -328,7 +347,7 @@ _c_hist_compare(const void **_a, const void **_b)
* that country, and cc is a lowercased country code. Returns NULL if we don't
* want to export geoip data yet. */
char *
geoip_get_client_history(time_t now)
geoip_get_client_history(time_t now, geoip_client_action_t action)
{
char *result = NULL;
if (!geoip_is_loaded())
......@@ -343,7 +362,10 @@ geoip_get_client_history(time_t now)
unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
unsigned total = 0;
HT_FOREACH(ent, clientmap, &client_history) {
int country = geoip_get_country_by_ip((*ent)->ipaddr);
int country;
if (((*ent)->last_seen & ACTION_MASK) != action)
continue;
country = geoip_get_country_by_ip((*ent)->ipaddr);
if (country < 0)
continue;
tor_assert(0 <= country && country < n_countries);
......@@ -404,6 +426,41 @@ geoip_get_client_history(time_t now)
return result;
}
void
dump_geoip_stats(void)
{
#ifdef ENABLE_GEOIP_STATS
time_t now = time(NULL);
char *filename = get_datadir_fname("geoip-stats");
char *data_v2 = NULL, *data_v3 = NULL;
char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
open_file_t *open_file = NULL;
FILE *out;
data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
format_iso_time(since, geoip_get_history_start());
format_iso_time(written, now);
if (!data_v2 || !data_v3)
goto done;
out = start_writing_to_stdio_file(filename, 0, 0600, &open_file);
if (!out)
goto done;
if (fprintf(out, "written %s\nstarted-at %s\nns %s\nns-v2%s\n",
written, since, data_v3, data_v2) < 0)
goto done;
finish_writing_to_file(open_file);
open_file = NULL;
done:
if (open_file)
abort_writing_to_file(open_file);
tor_free(filename);
tor_free(data_v2);
tor_free(data_v3);
#endif
}
/** Helper used to implement GETINFO ip-to-country/... controller command. */
int
getinfo_helper_geoip(control_connection_t *control_conn,
......
......@@ -832,6 +832,7 @@ run_scheduled_events(time_t now)
static time_t time_to_clean_caches = 0;
static time_t time_to_recheck_bandwidth = 0;
static time_t time_to_check_for_expired_networkstatus = 0;
static time_t time_to_dump_geoip_stats = 0;
or_options_t *options = get_options();
int i;
int have_dir_info;
......@@ -958,6 +959,12 @@ run_scheduled_events(time_t now)
time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
}
if (time_to_dump_geoip_stats < now) {
#define DUMP_GEOIP_STATS_INTERVAL (60*60);
time_to_dump_geoip_stats = now + DUMP_GEOIP_STATS_INTERVAL;
dump_geoip_stats();
}
/** 2. Periodically, we consider getting a new directory, getting a
* new running-routers list, and/or force-uploading our descriptor
* (if we've passed our internal checks). */
......
......@@ -2358,6 +2358,10 @@ typedef struct {
* count of how many client addresses have contacted us so that we can help
* the bridge authority guess which countries have blocked access to us. */
int BridgeRecordUsageByCountry;
#ifdef ENABLE_GEOIP_STATS
int DirRecordUsageByCountry;
#endif
/** Optionally, a file with GeoIP data. */
char *GeoIPFile;
......@@ -3294,13 +3298,27 @@ int geoip_get_country_by_ip(uint32_t ipaddr);
int geoip_get_n_countries(void);
const char *geoip_get_country_name(int num);
int geoip_is_loaded(void);
void geoip_note_client_seen(uint32_t addr, time_t now);
/** Indicates an action that we might be noting geoip statistics on.
* Note that if we're noticing CONNECT, we're a bridge, and if we're noticing
* the others, we're not.
*/
typedef enum {
/** We've noticed a connection as a bridge relay. */
GEOIP_CLIENT_CONNECT = 0,
/** We've served a networkstatus consensus as a directory server. */
GEOIP_CLIENT_NETWORKSTATUS = 1,
/** We've served a v2 networkstatus consensus as a directory server. */
GEOIP_CLIENT_NETWORKSTATUS_V2 = 2,
} geoip_client_action_t;
void geoip_note_client_seen(geoip_client_action_t action,
uint32_t addr, time_t now);
void geoip_remove_old_clients(time_t cutoff);
time_t geoip_get_history_start(void);
char *geoip_get_client_history(time_t now);
char *geoip_get_client_history(time_t now, geoip_client_action_t action);
int getinfo_helper_geoip(control_connection_t *control_conn,
const char *question, char **answer);
void geoip_free_all(void);
void dump_geoip_stats(void);
/********************************* hibernate.c **********************/
......
......@@ -1830,7 +1830,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
geoip_remove_old_clients(now-48*60*60);
last_purged_at = now;
}
geoip_summary = geoip_get_client_history(time(NULL));
geoip_summary = geoip_get_client_history(time(NULL), GEOIP_CLIENT_CONNECT);
if (geoip_summary) {
char geoip_start[ISO_TIME_LEN+1];
format_iso_time(geoip_start, geoip_get_history_start());
......
......@@ -3908,28 +3908,28 @@ test_geoip(void)
get_options()->BridgeRecordUsageByCountry = 1;
/* Put 9 observations in AB... */
for (i=32; i < 40; ++i)
geoip_note_client_seen(i, now);
geoip_note_client_seen(225, now);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, 225, now);
/* and 3 observations in XY, several times. */
for (j=0; j < 10; ++j)
for (i=52; i < 55; ++i)
geoip_note_client_seen(i, now-3600);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
/* and 17 observations in ZZ... */
for (i=110; i < 127; ++i)
geoip_note_client_seen(i, now-7200);
s = geoip_get_client_history(now+5*24*60*60);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-7200);
s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
test_assert(s);
test_streq("zz=24,ab=16", s);
tor_free(s);
/* Now clear out all the zz observations. */
geoip_remove_old_clients(now-6000);
s = geoip_get_client_history(now+5*24*60*60);
s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
test_assert(! s); /* There are only 12 observations left. Not enough to
build an answer. Add 4 more in XY... */
for (i=55; i < 59; ++i)
geoip_note_client_seen(i, now-3600);
s = geoip_get_client_history(now+5*24*60*60);
geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
test_assert(s);
test_streq("ab=16", s);
tor_free(s);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment