#!/usr/bin/env python3

'''
  OnionPerf
  Authored by Rob Jansen, 2015
  Copyright 2015-2020 The Tor Project
  See LICENSE for licensing information
'''

import sys, os, argparse, logging, re, datetime
from itertools import cycle
from socket import gethostname

import onionperf.util as util
from onionperf.monitor import get_supported_torctl_events

DESC_MAIN = """
OnionPerf is a utility to monitor, measure, analyze, and visualize the
performance of Tor and Onion Services.

OnionPerf must be run with a subcommand to specify a mode of operation.
The primary mode is measure, but tools to monitor a running Tor process and
to analyze and visualize the Tor and TGen output that is collected in the
measure mode are also available.

For more information, see https://github.com/robgjansen/onionperf and
https://www.torproject.org.
"""
HELP_MAIN = """
Use 'onionperf <subcommand> --help' for more info
"""

DESC_MONITOR = """
This utility connects to a running Tor process on the Tor control port,
registers for several asynchronous events, and logs the events to disk
as they occur over time.
"""
HELP_MONITOR = """
Connect to Tor and log controller events to file
"""

DESC_MEASURE = """
OnionPerf uses multiple processes and threads to download random data through
Tor while tracking the performance of those downloads. The data is served and
fetched on localhost using two TGen (traffic generator) processes, but is
transferred through Tor using an ephemeral Tor Onion Service and a Tor
client process. Tor control information and TGen performance statistics are
logged to disk and can be later analyzed (using `onionperf analyze`) to
visualize changes in Tor performance over time.
"""
HELP_MEASURE = """
Measure Tor and Onion Service Performance using TGen
"""

DESC_ANALYZE = """
Parse results from the TGen traffic generator and Tor.

This subcommand processes TGen and Tor log files and stores the processed
data in json format for plotting. It was written so that the log files
need never be stored on disk decompressed, which is useful when log file
sizes reach tens of gigabytes.

The standard way to run this subcommand is to give the path to a TGen and/or
a TorCtl file (e.g., those produced with the `measure` subcommand) using the
`--tgen` and `--torctl` options, and the statistics file resulting from the
analysis will be dumped to `onionperf.analysis.json.xz`.
Another way to run this subcommand is to give two paths to directories
containing TGen and TorCtl files to have files matched by filename and analysis
files dumped to `%Y-%m-%d.onionperf.analysis.json.xz`.
(See https://collector.torproject.org/#type-onionperf.)
Stats files in the default Torperf format can also be exported.
"""

HELP_ANALYZE = """
Analyze Tor and TGen output
"""

DESC_FILTER = """
Takes an OnionPerf analysis results file or directory as input, applies filters,
and produces new OnionPerf analysis results file(s) as output.

The `filter` subcommand is typically used in combination with the `visualize`
subcommand. The work flow is to filter out any TGen transfers/streams or Tor
streams/circuits that are not supposed to be visualized and then visualize only
those measurements with an existing mapping between TGen transfers/streams and
Tor streams/circuits.

This subcommand only filters individual objects and leaves summaries unchanged.
"""
HELP_FILTER = """
Filter OnionPerf analysis results
"""

DESC_VISUALIZE = """
Loads an OnionPerf json file, e.g., one produced with the `analyze` subcommand,
and plots various interesting performance metrics to PDF files.
"""
HELP_VISUALIZE = """
Visualize OnionPerf analysis results
"""

logging.basicConfig(format='%(asctime)s %(created)f [onionperf] [%(levelname)s] %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S')
logging.getLogger("stem").setLevel(logging.WARN)


class CustomHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
    # adds the 'RawDescriptionHelpFormatter' to the ArgsDefault one
    def _format_action(self, action):
        parts = super()._format_action(action)
        if '--data' in parts:
             return parts.replace(' [LABEL ...]', '')
        return parts
    def _fill_text(self, text, width, indent):
        return ''.join([indent + line for line in text.splitlines(True)])

def main():
    hostname = gethostname().split('.')[0]

    # argparse.RawDescriptionHelpFormatter, RawTextHelpFormatter, RawDescriptionHelpFormatter
    my_formatter_class = CustomHelpFormatter

    # construct the options
    main_parser = argparse.ArgumentParser(description=DESC_MAIN, formatter_class=my_formatter_class)

    sub_parser = main_parser.add_subparsers(help=HELP_MAIN)

    # monitor
    monitor_parser = sub_parser.add_parser('monitor', description=DESC_MONITOR, help=HELP_MONITOR,
        formatter_class=my_formatter_class)
    monitor_parser.set_defaults(func=monitor, formatter_class=my_formatter_class)

    monitor_parser.add_argument('-p', '--port',
        help="""the Tor control port number N""",
        metavar="N", type=type_nonnegative_integer,
        action="store", dest="ctlport",
        default=9051)

    monitor_parser.add_argument('-l', '--log',
        help="""a file PATH to log Tor controller output, may be '-' for STDOUT""",
        metavar="PATH", type=type_str_file_path_out,
        action="store", dest="logpath",
        default="-")

    monitor_parser.add_argument('-e', '--events',
        help="""the Tor control EVENT(s) recognized by stem that should be monitored and logged""",
        metavar="EVENT", nargs='+',
        action="store", dest="events",
        default=get_supported_torctl_events(),
        choices=get_supported_torctl_events())

    monitor_parser.add_argument('-c', '--custom-events',
        help="""custom Tor control EVENT(s) not recognized by stem that should be monitored and logged""",
        metavar="CUSTOM_EVENT", nargs='+',
        action="store", dest="custom_events",
        default=[])

    # measure
    measure_parser = sub_parser.add_parser('measure', description=DESC_MEASURE, help=HELP_MEASURE,
        formatter_class=my_formatter_class)
    measure_parser.set_defaults(func=measure, formatter_class=my_formatter_class)

    measure_parser.add_argument('--tor',
        help="""a file PATH to a Tor binary""",
        metavar="PATH", type=type_str_path_in,
        action="store", dest="torpath",
        default=util.which("tor"))

    measure_parser.add_argument('--tgen',
        help="""a file PATH to a TGen binary""",
        metavar="PATH", type=type_str_path_in,
        action="store", dest="tgenpath",
        default=util.which("tgen"))

    measure_parser.add_argument('--oneshot',
        help="""Enables oneshot mode, onionperf closes on successfully downloading a file""",
        action="store_true", dest="oneshot",
        default=False)

    measure_parser.add_argument('--additional-client-conf',
        help="""Additional configuration lines for the Tor client, for example bridge lines""",
        metavar="CONFIG", type=str,
        action="store", dest="additional_client_conf",
        default="")

    measure_parser.add_argument('--torclient-conf-file',
        help="""Configuration file for the Tor client""",
        metavar="CONFIG", type=str,
        action="store", dest="torclient_conf_file",
        default="")

    measure_parser.add_argument('--torserver-conf-file',
        help="""Configuration file for the Tor server""",
        metavar="CONFIG", type=str,
        action="store", dest="torserver_conf_file",
        default="")

    measure_parser.add_argument('--tgen-connect-ip',
        help="""the TGen client connect IP address ADDR, or 0.0.0.0 to do an external IP lookup; must be Internet-accessible for non-onion downloads to work""",
        metavar="ADDR", type=type_str_ip_in,
        action="store", dest="tgenconnectip",
        default="0.0.0.0")

    measure_parser.add_argument('--tgen-listen-port',
        help="""the TGen server listen port number N, must either be equal to '--tgen-connect-port' or a firewall rule should be in place to forward '--tgen-connect-port' to this port""",
        metavar="N", type=type_nonnegative_integer,
        action="store", dest="tgenlistenport",
        default=8080)

    measure_parser.add_argument('--tgen-connect-port',
        help="""the TGen client connect port number N, must be Internet-accessible for non-onion downloads to work""",
        metavar="N", type=type_nonnegative_integer,
        action="store", dest="tgenconnectport",
        default=8080)

    onion_or_inet_only_group = measure_parser.add_mutually_exclusive_group()

    onion_or_inet_only_group.add_argument('-o', '--onion-only',
        help="""only measure download times over Tor to an ephemeral onion service""",
        action="store_true", dest="onion_only",
        default=False)

    onion_or_inet_only_group.add_argument('-i', '--inet-only',
        help="""only measure download times over Tor exiting to a public webserver""",
        action="store_true", dest="inet_only",
        default=False)

    measure_parser.add_argument('-s', '--single-onion',
        help="""use a single onion service""",
        action="store_true", dest="single_onion",
        default=False)

    measure_parser.add_argument('-n', '--nickname',
        help="""the 'SOURCE' STRING to use in measurement result files produced by OnionPerf""",
        metavar="STRING", type=str,
        action="store", dest="nickname",
        default=hostname)

    measure_parser.add_argument('-p', '--prefix',
        help="""a directory PATH prefix where OnionPerf will run""",
        metavar="PATH", type=type_str_dir_path_out,
        action="store", dest="prefix",
        default=os.getcwd() + "/onionperf-data")

    measure_parser.add_argument('-k', '--key-prefix',
        help="""a directory PATH prefix where OnionPerf will store its private key files""",
        metavar="PATH", type=type_str_dir_path_out,
        action="store", dest="private_prefix",
        default=os.getcwd() + "/onionperf-private")


    # analyze
    analyze_parser = sub_parser.add_parser('analyze', description=DESC_ANALYZE, help=HELP_ANALYZE,
        formatter_class=my_formatter_class)
    analyze_parser.set_defaults(func=analyze, formatter_class=my_formatter_class)

    analyze_parser.add_argument('--tgen',
        help="""a file or directory PATH to a TGen logfile or logfile directory""",
        metavar="PATH", type=type_str_path_in,
        action="store", dest="tgen_logpath",
        default=None)

    analyze_parser.add_argument('--torctl',
        help="""a file or directory PATH to a TorCtl logfile or logfile directory (in the format output by the monitor subcommmand)""",
        metavar="PATH", type=type_str_path_in,
        action="store", dest="torctl_logpath",
        default=None)

    analyze_parser.add_argument('-p', '--prefix',
        help="""A directory PATH prefix where the processed data
files generated by this script will be written""",
        metavar="PATH", type=type_str_dir_path_out,
        action="store", dest="prefix",
        default=os.getcwd())

    analyze_parser.add_argument('-a', '--address',
        help="""an IP address STRING that identifies the machine where the input logfiles were produced""",
        metavar="STRING", type=type_str_ip_in,
        action="store", dest="ip_address",
        default=None)

    analyze_parser.add_argument('-n', '--nickname',
        help="""a nickname STRING that identifies the machine where the input logfiles were produced""",
        metavar="STRING", type=str,
        action="store", dest="nickname",
        default=None)

    date_group = analyze_parser.add_mutually_exclusive_group()
    date_group.add_argument('-d', '--date-filter',
        help="""a DATE string in the form YYYY-MM-DD, all log messages that did not occur on this date will be filtered out of the analysis""",
        metavar="DATE", type=type_str_date_in,
        action="store", dest="date_filter",
        default=None)

    date_group.add_argument('-x', '--date-prefix',
        help="""a DATE string in the form YYYY-MM-DD to add as prefix to the output file""",
        metavar="DATE", type=type_str_date_in,
        action="store", dest="date_prefix",
        default=None)

    # filter
    filter_parser = sub_parser.add_parser('filter', description=DESC_FILTER, help=HELP_FILTER,
        formatter_class=my_formatter_class)
    filter_parser.set_defaults(func=filter, formatter_class=my_formatter_class)

    filter_parser.add_argument('-i', '--input',
        help="""a file or directory PATH from which OnionPerf analysis results
                files are read""",
        metavar="PATH", required="True",
        action="store", dest="input")

    filter_parser.add_argument('--include-fingerprints',
        help="""include only Tor circuits with known circuit path and with all
                relays being contained in the fingerprints file located at
                PATH""",
        metavar="PATH", action="store", dest="include_fingerprints",
        default=None)

    filter_parser.add_argument('--exclude-fingerprints',
        help="""exclude Tor circuits without known circuit path or with any
                relay being contained in the fingerprints file located at
                PATH""",
        metavar="PATH", action="store", dest="exclude_fingerprints",
        default=None)

    filter_parser.add_argument('-o', '--output',
        help="""a file or directory PATH where filtered output OnionPerf
                analysis results files are written""",
        metavar="PATH", required="True",
        action="store", dest="output")

    # visualize
    visualize_parser = sub_parser.add_parser('visualize', description=DESC_VISUALIZE, help=HELP_VISUALIZE,
        formatter_class=my_formatter_class)
    visualize_parser.set_defaults(func=visualize, formatter_class=my_formatter_class)

    visualize_parser.add_argument('-d', '--data',
        help="""Appends one or more PATHS to OnionPerf
                analysis results files or directories of such files, and a LABEL
                to be used for the graph legend for this dataset""",
        metavar=("PATH [PATH...] LABEL", "LABEL"),
        nargs='+',
        required="True",
        action=PathStringArgsAction, dest="datasets")

    visualize_parser.add_argument('--outer-join',
        help="""Include measurements without an existing mapping between TGen
                transfers/streams and Tor streams/circuits, which is the
                equivalent of an outer join in the database sense""",
        action="store_true", dest="outer_join",
        default=False)

    visualize_parser.add_argument('-p', '--prefix',
        help="a STRING filename prefix for graphs we generate",
        metavar="STRING", type=str,
        action="store", dest="prefix",
        default=None)

    # get args and call the command handler for the chosen mode
    if len(sys.argv) == 1:
        main_parser.print_help()
        sys.exit(1)
    else:
        args = main_parser.parse_args()
        args.func(args)

def monitor(args):
    from onionperf.monitor import TorMonitor

    events = args.events + args.custom_events
    eventstr = ','.join(events)

    writer = util.FileWritable(args.logpath)
    mon = TorMonitor(args.ctlport, writer, events=events)
    try:
        fname = 'STDOUT' if args.logpath == '-' else args.logpath
        startup_msg = "tor-ctl-logger started logging Tor events {0} from port {1} to {2}".format(eventstr, args.ctlport, fname)
        logging.info(startup_msg)
        mon.run()
        logging.info("tor-ctl-logger is done logging Tor events {0} from port {1}".format(eventstr, args.ctlport))
    except KeyboardInterrupt:
        pass  # the user hit ctrl+c
    writer.close()

def measure(args):
    from onionperf.measurement import Measurement

    # check paths
    args.torpath = util.find_path(args.torpath, "tor")
    if args.torpath is not None: args.tgenpath = util.find_path(args.tgenpath, "tgen")

    # validate paths and run
    if args.torpath is not None and args.tgenpath is not None:
        os.chdir(args.prefix)

        client_connect_ip = args.tgenconnectip
        client_connect_port = args.tgenconnectport
        client_tgen_port = util.get_random_free_port()
        client_tor_ctl_port = util.get_random_free_port()
        client_tor_socks_port = util.get_random_free_port()

        server_tgen_port = args.tgenlistenport
        server_tor_ctl_port = util.get_random_free_port()
        server_tor_socks_port = util.get_random_free_port()

        meas = Measurement(args.torpath,
                           args.tgenpath,
                           args.prefix,
                           args.private_prefix,
                           args.nickname,
                           args.oneshot,
                           args.additional_client_conf,
                           args.torclient_conf_file,
                           args.torserver_conf_file,
                           args.single_onion)

        meas.run(do_onion=not args.inet_only,
                 do_inet=not args.onion_only,
                 client_tgen_listen_port=client_tgen_port,
                 client_tgen_connect_ip=client_connect_ip,
                 client_tgen_connect_port=client_connect_port,
                 client_tor_ctl_port=client_tor_ctl_port,
                 client_tor_socks_port=client_tor_socks_port,
                 server_tgen_listen_port=server_tgen_port,
                 server_tor_ctl_port=server_tor_ctl_port,
                 server_tor_socks_port=server_tor_socks_port)
    else:
        logging.info("Please fix path errors to continue")

def analyze(args):

    if args.tgen_logpath is None and args.torctl_logpath is None:
        logging.warning("No logfile paths were given, nothing will be analyzed")

    elif (args.tgen_logpath is None or os.path.isfile(args.tgen_logpath)) and (args.torctl_logpath is None or os.path.isfile(args.torctl_logpath)):
        from onionperf.analysis import OPAnalysis
        analysis = OPAnalysis(nickname=args.nickname, ip_address=args.ip_address)
        if args.tgen_logpath is not None:
            analysis.add_tgen_file(args.tgen_logpath)
        if args.torctl_logpath is not None:
            analysis.add_torctl_file(args.torctl_logpath)
        analysis.analyze(date_filter=args.date_filter)
        analysis.save(output_prefix=args.prefix, date_prefix=args.date_prefix)

    elif args.tgen_logpath is not None and os.path.isdir(args.tgen_logpath) and args.torctl_logpath is not None and os.path.isdir(args.torctl_logpath):
        from onionperf import reprocessing
        tgen_logs = reprocessing.collect_logs(args.tgen_logpath, '*tgen.log*')
        torctl_logs = reprocessing.collect_logs(args.torctl_logpath, '*torctl.log*')
        log_pairs = reprocessing.match(tgen_logs, torctl_logs, args.date_filter)
        logging.info("Found {0} matching log pairs to be reprocessed".format(len(log_pairs)))
        reprocessing.multiprocess_logs(log_pairs, args.prefix, args.nickname)

    else:
        logging.error("Given paths were an unrecognized mix of file and directory paths, nothing will be analyzed")

def filter(args):
    from onionperf.filtering import Filtering

    input_path = os.path.abspath(os.path.expanduser(args.input))
    if not os.path.exists(input_path):
        raise argparse.ArgumentTypeError("input path '%s' does not exist" % args.input)
    output_path = os.path.abspath(os.path.expanduser(args.output))
    if os.path.exists(output_path):
        raise argparse.ArgumentTypeError("output path '%s' already exists" % args.output)
    filtering = Filtering()
    if args.include_fingerprints is not None:
        filtering.include_fingerprints(args.include_fingerprints)
    if args.exclude_fingerprints is not None:
        filtering.exclude_fingerprints(args.exclude_fingerprints)
    if os.path.isfile(input_path):
        output_dir, output_file = os.path.split(output_path)
        filtering.apply_filters(input_path=input_path, output_dir=output_dir, output_file=output_file)
    else:
        from onionperf import reprocessing
        analyses = reprocessing.collect_logs(input_path, '*onionperf.analysis.*')
        for analysis in analyses:
            full_output_path = os.path.join(output_path, os.path.relpath(analysis, input_path))
            output_dir, output_file = os.path.split(full_output_path)
            filtering.apply_filters(input_path=analysis, output_dir=output_dir, output_file=output_file)

def visualize(args):
    from onionperf.visualization import TGenVisualization
    from onionperf.analysis import OPAnalysis

    tgen_viz = TGenVisualization()
    for (paths, label) in args.datasets:
        analyses = []
        for path in paths:
            analysis = OPAnalysis.load(filename=path)
            if analysis is not None:
               analyses.append(analysis)
        tgen_viz.add_dataset(analyses, label)
    tgen_viz.plot_all(args.prefix, outer_join=args.outer_join)

def type_nonnegative_integer(value):
    i = int(value)
    if i < 0: raise argparse.ArgumentTypeError("'%s' is an invalid non-negative int value" % value)
    return i

def type_supported_analysis(value):
    t = value.lower()
    if t != "all" and t != "tgen" and t != "tor":
        raise argparse.ArgumentTypeError("'%s' is an invalid Analysis type" % value)
    return t

def type_str_file_path_out(value):
    s = str(value)
    if s == "-":
        return s
    p = os.path.abspath(os.path.expanduser(s))
    util.make_dir_path(os.path.dirname(p))
    return p

def type_str_dir_path_out(value):
    s = str(value)
    p = os.path.abspath(os.path.expanduser(s))
    util.make_dir_path(p)
    return p

def type_str_path_in(value):
    s = str(value)
    if s == "-":
        return s
    p = os.path.abspath(os.path.expanduser(s))
    if not os.path.exists(p):
        raise argparse.ArgumentTypeError("path '%s' does not exist" % s)
    return p

def type_str_ip_in(value):
    s = str(value)
    ip = re.match(r'[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}', s)
    if ip is None:
        raise argparse.ArgumentTypeError("IP address '%s' is not a valid address" % s)
    return ip.group(0)

def type_str_date_in(value):
    s = str(value)
    parse_ok = False
    try:
        parts = s.split('-')
        if len(parts) == 3:
            y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
            parse_ok = True
    except:
        parse_ok = False
    if not parse_ok:
        raise argparse.ArgumentTypeError("date '%s' is not in the valid YYYY-MM-DD format" % s)
    if y < datetime.MINYEAR or y > datetime.MAXYEAR:
        raise argparse.ArgumentTypeError("the year portion of date '%s' must be in the range [%d, %d]" % s, datetime.MINYEAR, datetime.MAXYEAR)
    if m < 1 or m > 12:
        raise argparse.ArgumentTypeError("the month portion of date '%s' must be in the range [1, 12]" % s)
    if d < 1 or d > 31:
        raise argparse.ArgumentTypeError("the day portion of date '%s' must be in the range [1, 31]" % s)
    return datetime.date(y, m, d)

# a custom action for passing in experimental data directories when plotting
class PathStringArgsAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        # at least two arguments are required
        if len(values)<2: raise argparse.ArgumentError(self, "Must specify at least one path and one data label")

        # extract the paths to our data, and the label for the legend
        label = values[-1]
        paths = values[:-1]
        # fail if the label is a path
        check_label = os.path.abspath(os.path.expanduser(label))
        if os.path.exists(check_label) and label not in paths:
            logging.warning("The label supplied looks like a path: {}".format(label))

        dir_paths = []
        for item in paths:
            p = os.path.abspath(os.path.expanduser(item))
            if not os.path.exists(p): raise argparse.ArgumentError(self, "The supplied path does not exist: '{0}'".format(p))
            if os.path.isdir(p):
                from onionperf import reprocessing
                dir_paths.extend(reprocessing.collect_logs(item, "*onionperf.analysis.json*"))

        # remove any directories from the list of paths
        paths = [p for p in paths if not os.path.isdir(p)]
        # add all json.xz files found in directories to the list of paths
        paths.extend(dir_paths)
        # remove any duplicates
        paths = list(set(paths))

        # remove the default
        if "_didremovedefault" not in namespace:
            setattr(namespace, self.dest, [])
            setattr(namespace, "_didremovedefault", True)
        # append out new experiment path
        dest = getattr(namespace, self.dest)
        dest.append((paths, label))

if __name__ == '__main__': sys.exit(main())
