diff --git a/docs/source/config.example.ini b/docs/source/config.example.ini index f3ca1dab9fa7a1db08ec3954a775255304e7179e..9a0d1cad10f3ea6aae3469aa891b028ec6dcc2af 100644 --- a/docs/source/config.example.ini +++ b/docs/source/config.example.ini @@ -2,9 +2,16 @@ [scanner] # A human-readable string with chars in a-zA-Z0-9 to identify your scanner nickname = sbws_default +# ISO 3166-1 alpha-2 country code. To be edited. +# Default to a non existing country to detect it was not edited. +country = AA [destinations] foo = off [destinations.foo] url = https://example.com/does/not/exist.bin +# ISO 3166-1 alpha-2 country code. To be edited. +# Use ZZ if the destination URL is a domain name and it is in a CDN. +# Default to a non existing country to detect it was not edited. +country = AA diff --git a/docs/source/man_sbws.ini.rst b/docs/source/man_sbws.ini.rst index 8886d625068e7d243697eb4e324078153822c17f..8f64b0da56dece102211cefd68c1761a0dffbea9 100644 --- a/docs/source/man_sbws.ini.rst +++ b/docs/source/man_sbws.ini.rst @@ -8,9 +8,9 @@ Tor bandwidth scanner configuration file. **sbws** (1) ``scanner`` command requires a configuration file with a "[destinations]" section. +"[destinations]" is the only section that does not have a default value. -It is the only section that does not have a default value. - +It is also required to configure "country" in the "[scanner]" section. It is recommended, but not required to configure "nickname" in the "[scanner]" section. @@ -77,6 +77,9 @@ destinations.STR verify = BOOL Whether or not to verify the destination certificate. (Default: True) + country = STR + ISO 3166-1 alpha-2 country code. + Use ZZ if the destination URL is a domain name and it is in a CDN. tor @@ -99,6 +102,9 @@ scanner nickname = STR A human-readable string with chars in a-zA-Z0-9 to identify the scanner. (Default: IDidntEditTheSBWSConfig) + country = STR + ISO 3166-1 alpha-2 country code. + (Default: AA, a non existing country to detect it was not edited) download_toofast = INT Limits on what download times are too fast/slow/etc. (Default: 1) download_min = INT diff --git a/sbws/config.default.ini b/sbws/config.default.ini index f666640751ae4896ae10808ad4722fd7e81042e0..81b4b07fef66c2d991586190c46d01437dbe0192 100644 --- a/sbws/config.default.ini +++ b/sbws/config.default.ini @@ -36,6 +36,9 @@ reset_bw_ipv6_changes = off [scanner] # A human-readable string with chars in a-zA-Z0-9 to identify your scanner nickname = IDidntEditTheSBWSConfig +# ISO 3166-1 alpha-2 country code. To be edited. +# Default to a non existing country to detect it was not edited. +country = AA # Limits on what download times are too fast/slow/etc. download_toofast = 1 download_min = 5 diff --git a/sbws/core/generate.py b/sbws/core/generate.py index df860294f54ac5591dc1fb87eb23fff211f9b8a7..7283c250f19c182cc226b11545beea0c118e8241 100644 --- a/sbws/core/generate.py +++ b/sbws/core/generate.py @@ -9,6 +9,7 @@ from argparse import ArgumentDefaultsHelpFormatter import os import logging from sbws.util.timestamp import now_fname +from sbws.lib import destination log = logging.getLogger(__name__) @@ -104,8 +105,12 @@ def main(args, conf): state_fpath = conf.getpath('paths', 'state_fname') consensus_path = os.path.join(conf.getpath('tor', 'datadir'), "cached-consensus") - bw_file = V3BWFile.from_results(results, state_fpath, args.scale_constant, - scaling_method, + # Accept None as scanner_country to be compatible with older versions. + scanner_country = conf['scanner'].get('country') + destinations_countries = destination.parse_destinations_countries(conf) + bw_file = V3BWFile.from_results(results, scanner_country, + destinations_countries, state_fpath, + args.scale_constant, scaling_method, torflow_cap=args.torflow_bw_margin, round_digs=args.round_digs, secs_recent=args.secs_recent, diff --git a/sbws/core/scanner.py b/sbws/core/scanner.py index 00071bedc4e3bd2c37a0d8fda7b360f6024c8b4b..3fbd3785c0c23540d0680ff0b10b2a959d8500f1 100644 --- a/sbws/core/scanner.py +++ b/sbws/core/scanner.py @@ -3,6 +3,7 @@ import signal import sys import threading +import traceback import uuid from ..lib.circuitbuilder import GapsCircuitBuilder as CB @@ -36,6 +37,9 @@ pool = None rd = None controller = None +FILLUP_TICKET_MSG = """Something went wrong. +Please create a ticket in https://trac.torproject.org with this traceback.""" + def stop_threads(signal, frame, exit_code=0): global rd, pool @@ -56,10 +60,7 @@ signal.signal(signal.SIGTERM, stop_threads) def dumpstacks(): - import traceback - log.critical("sbws stop measuring relays, probably because of a bug." - "Please, open a ticket in trac.torproject.org with this" - "backtrace.") + log.critical(FILLUP_TICKET_MSG) thread_id2name = dict([(t.ident, t.name) for t in threading.enumerate()]) for thread_id, stack in sys._current_frames().items(): log.critical("Thread: %s(%d)", @@ -83,8 +84,6 @@ def timed_recv_from_server(session, dest, byte_range): start_time = time.time() HTTP_GET_HEADERS['Range'] = byte_range - # TODO: - # - What other exceptions can this throw? # - response.elapsed "measures the time taken between sending the first # byte of the request and finishing parsing the headers. # It is therefore unaffected by consuming the response content" @@ -93,10 +92,8 @@ def timed_recv_from_server(session, dest, byte_range): try: # headers are merged with the session ones, not overwritten. session.get(dest.url, headers=HTTP_GET_HEADERS, verify=dest.verify) - # NewConnectionError will be raised when shutting down. - except (requests.exceptions.ConnectionError, - requests.exceptions.ReadTimeout, - requests.exceptions.NewConnectionError) as e: + # Catch any `requests` exception, so that it can stored in the Result + except requests.exceptions.RequestException as e: log.debug(e) return False, e end_time = time.time() @@ -392,7 +389,11 @@ def result_putter_error(target): def closure(object): # The only object that can be here if there is not any uncatched # exception is stem.SocketClosed when stopping sbws - log.debug(type(object)) + # An exception here means that the worker thread finished. + log.warning(FILLUP_TICKET_MSG) + # To print the traceback that happened in the thread, not here in the + # main process + traceback.print_exception(type(object), object, object.__traceback__) return closure @@ -540,14 +541,18 @@ def run_speedtest(args, conf): fail_hard(error_msg) max_pending_results = conf.getint('scanner', 'measurement_threads') pool = Pool(max_pending_results) - try: main_loop(args, conf, controller, rl, cb, rd, rp, destinations, max_pending_results, pool) except KeyboardInterrupt: log.info("Interrupted by the user.") - finally: stop_threads(signal.SIGINT, None) + # Any exception not catched at this point would make the scanner stall. + # Log it and exit gracefully. + except Exception as e: + log.critical(FILLUP_TICKET_MSG) + log.exception(e) + stop_threads(signal.SIGTERM, None, 1) def gen_parser(sub): diff --git a/sbws/lib/destination.py b/sbws/lib/destination.py index e1be0bbd29b56925021bc755ab3f1039d628e963..a92df61e7ae5f04e2a89c7123e5bc7333a70897a 100644 --- a/sbws/lib/destination.py +++ b/sbws/lib/destination.py @@ -12,6 +12,26 @@ from ..globals import MAXIMUM_NUMBER_DESTINATION_FAILURES log = logging.getLogger(__name__) +# Duplicate some code from DestinationList.from_config, +# it should be refactored. +def parse_destinations_countries(conf): + """Returns the destinations' country as string separated by comma. + + """ + destinations_countries = [] + for key in conf['destinations'].keys(): + # Not a destination key + if key in ['usability_test_interval']: + continue + # The destination is not enabled + if not conf['destinations'].getboolean(key): + continue + destination_section = 'destinations.{}'.format(key) + destination_country = conf[destination_section].get('country', None) + destinations_countries.append(destination_country) + return ','.join(destinations_countries) + + def _parse_verify_option(conf_section): if 'verify' not in conf_section: return DESTINATION_VERIFY_CERTIFICATE @@ -76,11 +96,8 @@ def connect_to_destination_over_circuit(dest, circ_id, session, cont, max_dl): listener = stem_utils.attach_stream_to_circuit_listener(cont, circ_id) stem_utils.add_event_listener(cont, listener, EventType.STREAM) try: - # TODO: - # - What other exceptions can this throw? head = session.head(dest.url, verify=dest.verify) - except (requests.exceptions.ConnectionError, - requests.exceptions.ReadTimeout) as e: + except requests.exceptions.RequestException as e: dest.set_failure() return False, 'Could not connect to {} over circ {} {}: {}'.format( dest.url, circ_id, stem_utils.circuit_str(cont, circ_id), e) diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py index 8cb9226f9f534fbdf48af7b06cdc09e6f5185e5f..a7ce57d9a8cf7e3992d6279708592669ed88d042 100644 --- a/sbws/lib/v3bwfile.py +++ b/sbws/lib/v3bwfile.py @@ -28,7 +28,8 @@ KEYVALUE_SEP_V1 = '=' KEYVALUE_SEP_V2 = ' ' # List of the extra KeyValues accepted by the class EXTRA_ARG_KEYVALUES = ['software', 'software_version', 'file_created', - 'earliest_bandwidth', 'generator_started'] + 'earliest_bandwidth', 'generator_started', + 'scanner_country', 'destinations_countries'] STATS_KEYVALUES = ['number_eligible_relays', 'minimum_number_eligible_relays', 'number_consensus_relays', 'percent_eligible_relays', 'minimum_percent_eligible_relays'] @@ -140,7 +141,8 @@ class V3BWHeader(object): return self.strv2 @classmethod - def from_results(cls, results, state_fpath=''): + def from_results(cls, results, scanner_country=None, + destinations_countries=None, state_fpath=''): kwargs = dict() latest_bandwidth = cls.latest_bandwidth_from_results(results) earliest_bandwidth = cls.earliest_bandwidth_from_results(results) @@ -150,6 +152,11 @@ class V3BWHeader(object): kwargs['earliest_bandwidth'] = unixts_to_isodt_str(earliest_bandwidth) if generator_started is not None: kwargs['generator_started'] = generator_started + # To be compatible with older bandwidth files, do not require it. + if scanner_country is not None: + kwargs['scanner_country'] = scanner_country + if destinations_countries is not None: + kwargs['destinations_countries'] = destinations_countries h = cls(timestamp, **kwargs) return h @@ -524,7 +531,8 @@ class V3BWFile(object): for bw_line in self.bw_lines]) @classmethod - def from_results(cls, results, state_fpath='', + def from_results(cls, results, scanner_country=None, + destinations_countries=None, state_fpath='', scale_constant=SBWS_SCALE_CONSTANT, scaling_method=TORFLOW_SCALING, torflow_obs=TORFLOW_OBS_LAST, @@ -551,7 +559,8 @@ class V3BWFile(object): """ log.info('Processing results to generate a bandwidth list file.') - header = V3BWHeader.from_results(results, state_fpath) + header = V3BWHeader.from_results(results, scanner_country, + destinations_countries, state_fpath) bw_lines_raw = [] number_consensus_relays = cls.read_number_consensus_relays( consensus_path) diff --git a/sbws/util/config.py b/sbws/util/config.py index b1ecc4fa38f99a1ef218267d8edcbd46687aad6e..6622ba3024b65dc49b1e98011d6c46b7d9da57dc 100644 --- a/sbws/util/config.py +++ b/sbws/util/config.py @@ -12,6 +12,8 @@ from sbws.globals import (DEFAULT_CONFIG_PATH, DEFAULT_LOG_CONFIG_PATH, USER_CONFIG_PATH, SUPERVISED_RUN_DPATH, SUPERVISED_USER_CONFIG_PATH) +from sbws.util.iso3166 import ISO_3166_ALPHA_2 + _ALPHANUM = 'abcdefghijklmnopqrstuvwxyz' _ALPHANUM += _ALPHANUM.upper() _ALPHANUM += '0123456789' @@ -269,6 +271,21 @@ def _validate_paths(conf): return errors +def _validate_country(conf, sec, key, err_tmpl): + errors = [] + if conf[sec].get(key, None) is None: + errors.append(err_tmpl.substitute( + sec=sec, key=key, val=None, + e="Missing country in configuration file.")) + return errors + valid = conf[sec]['country'] in ISO_3166_ALPHA_2 + if not valid: + errors.append(err_tmpl.substitute( + sec=sec, key=key, val=conf[sec][key], + e="Not a valid ISO 3166 alpha-2 country code.")) + return errors + + def _validate_scanner(conf): errors = [] sec = 'scanner' @@ -288,7 +305,7 @@ def _validate_scanner(conf): 'download_max': {'minimum': 0.001, 'maximum': None}, } all_valid_keys = list(ints.keys()) + list(floats.keys()) + \ - ['nickname'] + ['nickname', 'country'] errors.extend(_validate_section_keys(conf, sec, all_valid_keys, err_tmpl)) errors.extend(_validate_section_ints(conf, sec, ints, err_tmpl)) errors.extend(_validate_section_floats(conf, sec, floats, err_tmpl)) @@ -296,6 +313,7 @@ def _validate_scanner(conf): if not valid: errors.append(err_tmpl.substitute( sec=sec, key='nickname', val=conf[sec]['nickname'], e=error_msg)) + errors.extend(_validate_country(conf, sec, 'country', err_tmpl)) return errors @@ -388,7 +406,7 @@ def _validate_destinations(conf): urls = { 'url': {}, } - all_valid_keys = list(urls.keys()) + ['verify'] + all_valid_keys = list(urls.keys()) + ['verify', 'country'] for sec in dest_sections: if sec not in conf: errors.append('{} is an enabled destination but is not a ' @@ -397,6 +415,7 @@ def _validate_destinations(conf): errors.extend(_validate_section_keys( conf, sec, all_valid_keys, err_tmpl, allow_missing=['verify'])) errors.extend(_validate_section_urls(conf, sec, urls, err_tmpl)) + errors.extend(_validate_country(conf, sec, 'country', err_tmpl)) return errors diff --git a/sbws/util/iso3166.py b/sbws/util/iso3166.py new file mode 100644 index 0000000000000000000000000000000000000000..d537c18e6605e4bbdd8efb3ce95f367bec94368b --- /dev/null +++ b/sbws/util/iso3166.py @@ -0,0 +1,37 @@ +""" +ISO 3166 alpha-2 countries' codes. +Obtained from https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes. +Last updated 2019/02/05. +ZZ is not the code of any country and it is used to denote any country, +when the destination Web Server is in a CDN. +""" +# When the destination Web Server is in a CDN, the IP could be resolved by +# the exit relay and obtain the country from the IP. + +# It would be better to use some standard location system for geopgraphic areas +# that doesn't depend on political borders. +# It should be possible to obtain IP address location in that system too. + +ISO_3166_ALPHA_2 = [ + 'AF', 'AX', 'AL', 'DZ', 'AS', 'AD', 'AO', 'AI', 'AQ', 'AG', 'AR', 'AM', + 'AW', 'AU', 'AT', 'AZ', 'BS', 'BH', 'BD', 'BB', 'BY', 'BE', 'BZ', 'BJ', + 'BM', 'BT', 'BO', 'BQ', 'BA', 'BW', 'BV', 'BR', 'IO', 'BN', 'BG', 'BF', + 'BI', 'CV', 'KH', 'CM', 'CA', 'KY', 'CF', 'TD', 'CL', 'CN', 'CX', 'CC', + 'CO', 'KM', 'CD', 'CG', 'CK', 'CR', 'CI', 'HR', 'CU', 'CW', 'CY', 'CZ', + 'DK', 'DJ', 'DM', 'DO', 'EC', 'EG', 'SV', 'GQ', 'ER', 'EE', 'SZ', 'ET', + 'FK', 'FO', 'FJ', 'FI', 'FR', 'GF', 'PF', 'TF', 'GA', 'GM', 'GE', 'DE', + 'GH', 'GI', 'GR', 'GL', 'GD', 'GP', 'GU', 'GT', 'GG', 'GN', 'GW', 'GY', + 'HT', 'HM', 'VA', 'HN', 'HK', 'HU', 'IS', 'IN', 'ID', 'IR', 'IQ', 'IE', + 'IM', 'IL', 'IT', 'JM', 'JP', 'JE', 'JO', 'KZ', 'KE', 'KI', 'KP', 'KR', + 'KW', 'KG', 'LA', 'LV', 'LB', 'LS', 'LR', 'LY', 'LI', 'LT', 'LU', 'MO', + 'MK', 'MG', 'MW', 'MY', 'MV', 'ML', 'MT', 'MH', 'MQ', 'MR', 'MU', 'YT', + 'MX', 'FM', 'MD', 'MC', 'MN', 'ME', 'MS', 'MA', 'MZ', 'MM', 'NA', 'NR', + 'NP', 'NL', 'NC', 'NZ', 'NI', 'NE', 'NG', 'NU', 'NF', 'MP', 'NO', 'OM', + 'PK', 'PW', 'PS', 'PA', 'PG', 'PY', 'PE', 'PH', 'PN', 'PL', 'PT', 'PR', + 'QA', 'RE', 'RO', 'RU', 'RW', 'BL', 'SH', 'KN', 'LC', 'MF', 'PM', 'VC', + 'WS', 'SM', 'ST', 'SA', 'SN', 'RS', 'SC', 'SL', 'SG', 'SX', 'SK', 'SI', + 'SB', 'SO', 'ZA', 'GS', 'SS', 'ES', 'LK', 'SD', 'SR', 'SJ', 'SE', 'CH', + 'SY', 'TW', 'TJ', 'TZ', 'TH', 'TL', 'TG', 'TK', 'TO', 'TT', 'TN', 'TR', + 'TM', 'TC', 'TV', 'UG', 'UA', 'AE', 'GB', 'UM', 'US', 'UY', 'UZ', 'VU', + 'VE', 'VN', 'VG', 'VI', 'WF', 'EH', 'YE', 'ZM', 'ZW', 'ZZ' + ] diff --git a/tests/integration/sbws_testnet.ini b/tests/integration/sbws_testnet.ini index 99b7f56a56b99c5abdc2aee19341ad28d378b848..32fa1af09b2354d2ca2be105a95cab9c23604b84 100644 --- a/tests/integration/sbws_testnet.ini +++ b/tests/integration/sbws_testnet.ini @@ -5,6 +5,9 @@ data_period = 1 [paths] sbws_home = /tmp/.sbws +[scanner] +country = ZZ + [destinations] local = on @@ -12,6 +15,7 @@ local = on ; url = https://localhost:28888/sbws.bin url = http://127.0.0.1:28888/sbws.bin verify = False +country = ZZ [tor] extra_lines = diff --git a/tests/unit/lib/test_v3bwfile.py b/tests/unit/lib/test_v3bwfile.py index 2d38f482e42fe18577951e55f6f16f7c787198e7..0448c6972fe9bef19fd9dcbebbb681a0e1c3eee6 100644 --- a/tests/unit/lib/test_v3bwfile.py +++ b/tests/unit/lib/test_v3bwfile.py @@ -16,6 +16,11 @@ from sbws.util.timestamp import now_fname, now_isodt_str, now_unixts timestamp = 1523974147 timestamp_l = str(timestamp) version_l = KEYVALUE_SEP_V1.join(['version', SPEC_VERSION]) +scanner_country = 'US' +scanner_country_l = KEYVALUE_SEP_V1.join(['scanner_country', scanner_country]) +destinations_countries = '00,DE' +destinations_countries_l = KEYVALUE_SEP_V1.join(['destinations_countries', + destinations_countries]) software_l = KEYVALUE_SEP_V1.join(['software', 'sbws']) software_version_l = KEYVALUE_SEP_V1.join(['software_version', version]) file_created = '2018-04-25T13:10:57' @@ -23,8 +28,9 @@ file_created_l = KEYVALUE_SEP_V1.join(['file_created', file_created]) latest_bandwidth = '2018-04-17T14:09:07' latest_bandwidth_l = KEYVALUE_SEP_V1.join(['latest_bandwidth', latest_bandwidth]) -header_ls = [timestamp_l, version_l, file_created_l, latest_bandwidth_l, - software_l, software_version_l, TERMINATOR] +header_ls = [timestamp_l, version_l, destinations_countries_l, file_created_l, + latest_bandwidth_l, + scanner_country_l, software_l, software_version_l, TERMINATOR] header_str = LINE_SEP.join(header_ls) + LINE_SEP earliest_bandwidth = '2018-04-16T14:09:07' earliest_bandwidth_l = KEYVALUE_SEP_V1.join(['earliest_bandwidth', @@ -53,7 +59,9 @@ v3bw_str = header_extra_str + raw_bwl_str def test_v3bwheader_str(): """Test header str""" - header = V3BWHeader(timestamp_l, file_created=file_created) + header = V3BWHeader(timestamp_l, scanner_country=scanner_country, + destinations_countries=destinations_countries, + file_created=file_created) assert header_str == str(header) diff --git a/tests/unit/util/test_config.py b/tests/unit/util/test_config.py index 95dde6fb178672654a91e1f574d44b12e583a124..4ebc70371f6e9c8bb3f176fccf319ca56591aa0b 100644 --- a/tests/unit/util/test_config.py +++ b/tests/unit/util/test_config.py @@ -235,6 +235,36 @@ def test_nickname(): assert not valid, reason +def test_country(conf): + from string import Template + err_tmpl = Template('$sec/$key ($val): $e') + + # Invalid default country code in scanner section + errors = con._validate_country(conf, 'scanner', 'country', err_tmpl) + assert errors[0] == \ + 'scanner/country (AA): Not a valid ISO 3166 alpha-2 country code.' + + # Valid country code in scanner section + conf['scanner']['country'] = 'US' + errors = con._validate_country(conf, 'scanner', 'country', err_tmpl) + assert not errors + + # No country in destinations.foo section + conf['destinations']['foo'] = 'on' + conf['destinations.foo'] = {} + conf['destinations.foo']['url'] = 'https://foo.bar' + errors = con._validate_country( + conf, 'destinations.foo', 'country', err_tmpl) + assert errors[0] == \ + 'destinations.foo/country (None): ' \ + 'Missing country in configuration file.' + + # Valid country in destinations.foo section + conf['destinations.foo']['url'] = 'US' + errors = con._validate_country(conf, 'scanner', 'country', err_tmpl) + assert not errors + + def test_config_arg_provided_but_no_found(args, conf): args.config = 'non_existing_file' user_conf = con._get_user_config(args, conf)