Commit ebd0acbd authored by Philipp Winter's avatar Philipp Winter
Browse files

Merge branch 'py3' into 'master'

Initial Python3 support

Closes #1

See merge request !1
parents 13bdbbb3 4d678bc0
#!/usr/bin/env python2
#!/usr/bin/env python3
# Copyright 2013, 2014, 2016 Philipp Winter <>
......@@ -28,7 +28,7 @@ import argparse
import datetime
import random
import logging
import ConfigParser
from configparser import ConfigParser
import functools
import pwd
......@@ -114,7 +114,7 @@ def parse_cmd_args():
home_dir = os.path.expanduser("~")
config_file = os.path.join(home_dir, ".exitmaprc")
config_parser = ConfigParser.SafeConfigParser()
config_parser = ConfigParser()
file_parsed =[config_file])
if file_parsed:
#!/usr/bin/env python2
#!/usr/bin/env python3
# Copyright 2013-2017 Philipp Winter <>
......@@ -24,10 +24,9 @@ Module to detect false negatives for <>.
import sys
import json
import logging
import urllib2
except ImportError:
import urllib.request as urllib2
import urllib.request
import socks
import socket
from util import exiturl
......@@ -50,10 +49,10 @@ def fetch_page(exit_desc):
url = exiturl(exit_desc.fingerprint)
data = urllib2.urlopen("",
data = urllib.request.urlopen("",
except Exception as err:
log.debug("urllib2.urlopen says: %s" % err)
log.debug("urllib.request.urlopen says: %s" % err)
if not data:
#!/usr/bin/env python2
#!/usr/bin/env python3
# Copyright 2016 Philipp Winter <>
......@@ -22,9 +22,9 @@ Check if a web site returns a CloudFlare CAPTCHA.
import sys
import StringIO
import io
import gzip
import httplib
import http.client
import collections
import logging
......@@ -35,7 +35,7 @@ log = logging.getLogger(__name__)
destinations = [("", 443)]
DOMAIN, PORT = destinations[0]
CAPTCHA_SIGN = "Attention Required! | Cloudflare"
CAPTCHA_SIGN = b"Attention Required! | Cloudflare"
# Mimic Tor Browser's request headers, so CloudFlare won't return a 403 because
# it thinks we are a bot.
......@@ -57,7 +57,7 @@ def decompress(data):
buf = StringIO.StringIO(data)
buf = io.StringIO(data)
fileobj = gzip.GzipFile(fileobj=buf)
data =
except Exception:
......@@ -74,7 +74,7 @@ def is_cloudflared(exit_fpr):
exit_url = util.exiturl(exit_fpr)
log.debug("Probing exit relay \"%s\"." % exit_url)
conn = httplib.HTTPSConnection(DOMAIN, PORT, strict=False)
conn = http.client.HTTPSConnection(DOMAIN, PORT)
conn.request("GET", "/", headers=collections.OrderedDict(HTTP_HEADERS))
response = conn.getresponse()
#!/usr/bin/env python2
#!/usr/bin/env python3
# Copyright 2013-2017 Philipp Winter <>
......@@ -50,7 +50,7 @@ def setup():
log.debug("Populating domain dictionary.")
for domain in domains.iterkeys():
for domain in list(domains.keys()):
response = dns.resolver.query(domain)
for record in response:
log.debug("Domain %s maps to %s." % (domain, record.address))
......@@ -98,7 +98,7 @@ def probe(exit_desc, run_python_over_tor, run_cmd_over_tor, **kwargs):
Probe the given exit relay and check if all domains resolve as expected.
for domain in domains.iterkeys():
for domain in list(domains.keys()):
run_python_over_tor(resolve, exit_desc, domain, domains[domain])
#!/usr/bin/env python2
#!/usr/bin/env python3
# Copyright 2016 Philipp Winter <>
#!/usr/bin/env python2
#!/usr/bin/env python3
# Copyright 2014-2016 Philipp Winter <>
# Copyright 2014 Josh Pitts <>
......@@ -36,7 +36,7 @@ Then run:
import sys
import os
import urllib2
import urllib.request, urllib.error, urllib.parse
except ImportError:
import urllib.request as urllib2
import tempfile
......@@ -81,15 +81,15 @@ def setup():"Creating temporary reference files.")
for url, _ in check_files.iteritems():
for url, _ in check_files.items():
log.debug("Attempting to download <%s>." % url)
request = urllib2.Request(url)
request = urllib.request.Request(url)
request.add_header('User-Agent', test_agent)
data = urllib2.urlopen(request).read()
data = urllib.request.urlopen(request).read()
except Exception as err:
log.warning("urlopen() failed: %s" % err)
......@@ -111,7 +111,7 @@ def teardown():"Removing reference files.")
for _, file_info in check_files.iteritems():
for _, file_info in check_files.items():
orig_file, _ = file_info"Removing file \"%s\"." % orig_file)
......@@ -161,7 +161,7 @@ def run_check(exit_desc):
exiturl = util.exiturl(exit_desc.fingerprint)
for url, file_info in check_files.iteritems():
for url, file_info in check_files.items():
orig_file, orig_digest = file_info
......@@ -169,11 +169,11 @@ def run_check(exit_desc):
data = None
request = urllib2.Request(url)
request = urllib.request.Request(url)
request.add_header('User-Agent', test_agent)
data = urllib2.urlopen(request, timeout=20).read()
data = urllib.request.urlopen(request, timeout=20).read()
except Exception as err:
log.warning("urlopen() failed for %s: %s" % (exiturl, err))
#!/usr/bin/env python2
#!/usr/bin/env python3
# Copyright 2013-2016 Philipp Winter <>
# Copyright 2016 Zack Weinberg <>
......@@ -231,7 +231,7 @@ def choose_probe_order(dests):
remaining = {}
last_appearance = {}
full_address = {}
for host, usable_ports in hosts.iteritems():
for host, usable_ports in hosts.items():
if p in usable_ports:
full_address[host] = (host, p)
......@@ -241,7 +241,7 @@ def choose_probe_order(dests):
rv = []
deadcycles = 0
while remaining:
ks = remaining.keys()
ks = list(remaining.keys())
x = random.choice(ks)
last = last_appearance[x]
if last == -1 or (len(rv) - last) >= (len(ks) // 4):
......@@ -25,14 +25,12 @@ that the relay (probably) has enough file descriptors.
import sys
import re
import logging
import urllib2
except ImportError:
import urllib.request as urllib2
import urllib.request, urllib.error, urllib.parse
from util import exiturl
import stem.descriptor.server_descriptor as descriptor
import socks
log = logging.getLogger(__name__)
......@@ -50,10 +48,10 @@ def fetch_page(exit_desc):
data = None
data = urllib2.urlopen("",
data = urllib.request.urlopen("",
except Exception as err:
log.warning("urllib2.urlopen for %s says: %s." %
log.warning("urllib.request.urlopen for %s says: %s." %
(exit_desc.fingerprint, err))
......@@ -178,7 +178,7 @@ def get_exits(data_dir,
exit_candidates = [
for fpr, desc in have_exit_policy.iteritems()
for fpr, desc in have_exit_policy.items()
if stem.Flag.EXIT in cached_consensus.get(fpr, stub_desc).flags
......@@ -25,8 +25,9 @@ import socket
import select
import errno
import logging
import _socket
import error
import socks
log = logging.getLogger(__name__)
......@@ -35,6 +36,7 @@ proxy_port = None
queue = None
circ_id = None
_orig_getaddrinfo = socket.getaddrinfo
orig_socket = socket.socket
_ERRNO_RETRY = frozenset((errno.EAGAIN, errno.EWOULDBLOCK,
......@@ -74,279 +76,66 @@ def send_queue(sock_name):
global queue, circ_id
assert (queue is not None) and (circ_id is not None)
queue.put([circ_id, sock_name])
class _Torsocket(orig_socket):
Provides a minimal, Tor-specific SOCKSv5 interface.
# Implementation note: socket.socket is (at least in Python 2) a
# wrapper object around _socket.socket. Most superclass methods
# cannot be invoked via the usual super().method(self, args...)
# construct. One must use self._sock.method(args...) instead.
def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM,
proto=0, _sock=None):
self._sockfamily = family
self._socktype = type
self._connecting = False
self._connected = False
self._peer_addr = None
self._conn_err = None
super(_Torsocket, self).__init__(family, type, proto, _sock)
# FIXME: Arguably this should happen only on connect() so that
# attempts to connect to can bypass the proxy server.
# However, that would make nonblocking mode significantly more
# complicated. We'd need an actual state machine instead of
# just a pair of booleans, and callers would need to be
# prepared to 'turn the crank' on the state machine.
def _recv_all(self, num_bytes):
Try to read the given number of bytes, blocking indefinitely
if necessary (even if the socket is in nonblocking mode).
If we are unable to read all of it, an EOFError is raised.
data = ""
while len(data) < num_bytes:
class _Torsocket(socks.socksocket):
def __init__(self, *args, **kwargs):
super(_Torsocket, self).__init__(*args, **kwargs)
orig_neg = self._proxy_negotiators[2] # This is the original function
def ourneg(*args, **kwargs):
"Our modified function to add data to the queue"
more = self._sock.recv(num_bytes - len(data))
except socket.error as e:
if e.errno not in _ERRNO_RETRY:
raise[self], [], [])
if not more:
raise EOFError("Could read only %d of expected %d bytes." %
(len(data), num_bytes))
data += more
return data
def _send_all(self, msg):
Try to send all of 'msg', blocking indefinitely if necessary
(even if the socket is in nonblocking mode).
sent = 0
while sent < len(msg):
n = self._sock.send(msg[sent:])
except socket.error as e:
if e.errno not in _ERRNO_RETRY:
raise[], [self], [])
if not n:
raise EOFError("Could send only %d of expected %d bytes." %
(sent, len(msg)))
sent += n
def _authenticate(self):
Authenticate to our SOCKSv5 server.
assert (proxy_addr is not None) and (proxy_port is not None)
# Connect to SOCKSv5 server. We use version 5 and one authentication
# method, which is "no authentication".
self._sock.connect((proxy_addr, proxy_port))
resp = self._recv_all(2)
if resp != "\x05\x00":
raise error.SOCKSv5Error("Invalid server response: 0x%s" %
def resolve(self, domain):
Resolve the given domain using Tor's SOCKS resolution extension.
domain_len = len(domain)
if domain_len > 255:
raise error.SOCKSv5Error("Domain must not be longer than 255 "
"characters, but %d given." % domain_len)
# Tor defines a new command value, \x0f, that is used for domain
# resolution.
self._send_all("\x05\xf0\x00\x03%s%s%s" %
(chr(domain_len), domain, "\x00\x00"))
resp = self._recv_all(10)
if resp[:2] != "\x05\x00":
raise error.SOCKSv5Error("Invalid server response: 0x%s" %
return socket.inet_ntoa(resp[4:8])
def connect(self, addr_tuple):
err = self.connect_ex(addr_tuple)
if err:
raise socket.error(err, os.strerror(err))
def connect_ex(self, addr_tuple):
Tell SOCKS server to connect to our destination.
dst_addr, dst_port = addr_tuple[0], int(addr_tuple[1])
self._connecting = True
self._peer_addr = (dst_addr, dst_port)
log.debug("Requesting connection to %s:%d.", dst_addr, dst_port)
self._send_all("\x05\x01\x00\x01%s%s" %
(socket.inet_aton(dst_addr), struct.pack(">H", dst_port)))
return self._attempt_finish_socks_handshake()
def _attempt_finish_socks_handshake(self):
# Receive the first byte of the server reply using the
# underlying recv() primitive, and suspend this operation if
# it comes back with EAGAIN, or fail it if it gives an error.
# Callers of connect_ex expect to get EINPROGRESS, not EAGAIN.
log.debug("Attempting to read SOCKS reply.")
resp0 = self._sock.recv(1)
except socket.error as e:
if e.errno in _ERRNO_RETRY:
log.debug("SOCKS reply not yet available.")
return errno.EINPROGRESS
log.debug("Connection failure: %s", e)
self._connecting = False
self._conn_err = e.errno
return e.errno
if resp0 != "\x05":
self._connecting = False
raise error.SOCKSv5Error(
"Protocol error: server reply begins with 0x%02x, not 0x05"
% ord(resp0))
# We are now committed to receiving and processing the server
# response.
resp = self._recv_all(3)
if resp[0] != "\x00":
self._connecting = False
val = ord(resp[0])
if val in socks5_errors:
self._conn_err = socks5_errors[val]
log.debug("Connection failure at protocol level: %s",
return self._conn_err
# we are adding to the queue before as orig_neg will also do
# the actual connection to the destination inside.
# args[0] is the original socket to the proxy address
orig_neg(*args, **kwargs)
except Exception as e:
log.debug("Error in custom negotiation function: {}".format(e))
self._proxy_negotiators[2] = ourneg
def negotiate(self):
proxy_type, addr, port, rdns, username, password = self.proxy
socks._BaseSocket.connect(self, (addr, port))
socks._BaseSocket.sendall(self, struct.pack('BBB', 0x05, 0x01, 0x00))
socks._BaseSocket.recv(self, 2)
def resolve(self, hostname):
"Resolves the given domain name over the proxy"
host = hostname.encode("utf-8")
# First connect to the local proxy
req = struct.pack('BBB', 0x05, 0xF0, 0x00)
req += chr(0x03).encode() + chr(len(host)).encode() + host
req = req + struct.pack(">H", 8444)
socks._BaseSocket.sendall(self, req)
# Get the response
ip = ""
resp = socks._BaseSocket.recv(self, 4)
if resp[0:1] != chr(0x05).encode():
raise error.SOCKSv5Error("SOCKS Server error")
elif resp[1:2] != chr(0x00).encode():
# Connection failed
if ord(resp[1:2])<=8:
raise error.SOCKSv5Error("SOCKS Server error {}".format(ord(resp[1:2])))
raise error.SOCKSv5Error("Unrecognized SOCKSv5 error: %d" % val)
# Read and discard the rest of the reply, which consists of an
# address type (1 byte), variable-length address (depending on the
# address type), and port number (2 bytes).
if resp[2] == "\x01":
elif resp[2] == "\x03":
length = self._recv_all(1)
raise error.SOCKSv5Error("SOCKS Server error 9")
# Get the bound address/port
elif resp[3:4] == chr(0x01).encode():
ip = socket.inet_ntoa(socks._BaseSocket.recv(self, 4))
elif resp[3:4] == chr(0x03).encode():
resp = resp + socks._BaseSocket.recv(self, 1)
ip = socks._BaseSocket.recv(self, ord(resp[4:5]))
# We are now officially connected.
log.debug("Now connected to %s:%d.", *self._peer_addr)
self._connected = True
return 0
def _maybe_finish_socks_handshake(self):
if self._connected:
if not self._connecting:
raise socket.error(errno.ENOTCONN, os.strerror(errno.ENOTCONN))
err = self._attempt_finish_socks_handshake()
if err:
# Callers of _this_ function expect EAGAIN, not EINPROGRESS.
if err in _ERRNO_RETRY:
raise socket.error(errno.EAGAIN, os.strerror(errno.EAGAIN))
raise socket.error(err, os.strerror(err))
# All of these functions must be prepared to process the final
# message of the SOCKS handshake.
def send(self, *args):
return self._sock.send(*args)
def sendall(self, *args):
return self._sock.sendall(*args)
def recv(self, *args):
return self._sock.recv(*args)
def recv_into(self, *args):
return self._sock.recv_into(*args)
def makefile(self, *args):
# This one is a normal method on socket.socket.
return super(_Torsocket, self).makefile(*args)
# These sockets can only be used as client sockets.
def accept(self): raise NotImplementedError
def bind(self): raise NotImplementedError
def listen(self): raise NotImplementedError
# These sockets can only be used as connected sockets.
def sendto(self, *a): raise NotImplementedError
def recvfrom(self, *a): raise NotImplementedError
def recvfrom_into(self, *a): raise NotImplementedError
# Provide information about the ultimate destination, not the
# proxy server. On normal sockets, getpeername() works immediately
# after connect(), even if it returned EINPROGRESS.
def getpeername(self):
if not self._connecting:
raise socket.error(errno.ENOTCONN, os.strerror(errno.ENOTCONN))
return self._peer_addr
# Provide the pending connection error if appropriate.
def getsockopt(self, level, opt, *args):
if level == socket.SOL_SOCKET and opt == socket.SO_ERROR:
if self._connecting:
err = self._attempt_finish_socks_handshake()
if err == errno.EINPROGRESS:
return 0 # there's no pending connection error yet
if self._conn_err is not None:
err = self._conn_err
self._conn_err = None
return err
raise error.SOCKSv5Error("SOCKS Server error.")
boundport = struct.unpack(">H", socks._BaseSocket.recv(self, 2))[0]
return ip
return self._sock.getsockopt(level, opt, *args)
def torsocket(family=socket.AF_INET, type=socket.SOCK_STREAM,
......@@ -375,6 +164,8 @@ def torsocket(family=socket.AF_INET, type=socket.SOCK_STREAM,
return _Torsocket(family, type, proto, _sock)