Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Hiro
GetTor
Commits
4e6e5805
Commit
4e6e5805
authored
Feb 08, 2016
by
Israel Leiva
Browse files
Merge pull request #11 from aagbsn/prune_stale_uploads
Prune stale uploads
parents
21816d16
bfbc97fd
Changes
49
Hide whitespace changes
Inline
Side-by-side
AUTHORS
View file @
4e6e5805
Current maintainer/core developers:
Israel Leiva <ilv@riseup.net> 4096R/540BFC0E
Sukhbir Sing <sukhbir@torproject.org> 4096R/B297B391
Past core developers:
Jacob Appelbaum <jacob@appelbaum.net>
...
...
@@ -11,3 +10,4 @@ Contributors:
Runa A. Sandvik <runa@torproject.org>
Nima Fatemi <nima@torproject.org>
Poly <poly@darkdepths.net>
Sukhbir Singh <sukhbir@torproject.org> 4096R/B297B391
get_mirrors.py
0 → 100644
View file @
4e6e5805
# -*- coding: utf-8 -*-
#
# This file is part of GetTor
#
# :authors: Israel Leiva <ilv@torproject.org>
# see also AUTHORS file
#
# :license: This is Free Software. See LICENSE for license information.
"""get_mirrors -- Download the list of tpo's mirrors for GetTor."""
import
os
import
json
import
codecs
import
logging
import
argparse
from
OpenSSL
import
SSL
from
OpenSSL
import
crypto
from
twisted.web
import
client
from
twisted.python
import
log
from
twisted.internet
import
ssl
from
twisted.internet
import
defer
from
twisted.internet
import
protocol
from
twisted.internet
import
reactor
from
twisted.internet.error
import
TimeoutError
from
twisted.internet.error
import
DNSLookupError
from
twisted.internet.error
import
ConnectionRefusedError
from
gettor
import
utils
# Associate each protocol with its column name in tor-mirrors.csv
PROTOS
=
{
'https'
:
'httpsWebsiteMirror'
,
'http'
:
'httpWebsiteMirror'
,
'rsync'
:
'rsyncWebsiteMirror'
,
'https-dist'
:
'httpsDistMirror'
,
'http-dist'
:
'httpDistMirror'
,
'rsync-dist'
:
'rsyncDistMirror'
,
'ftp'
:
'ftpWebsiteMirror'
,
'onion'
:
'hiddenServiceMirror'
,
}
# Tor Project's website certificate
# $ openssl s_client -showcerts -connect tpo:443 < /dev/null > tpo.pem
CERT_TPO
=
'/path/to/gettor/tpo.pem'
# Taken from get-tor-exits (BridgeDB)
class
FileWriter
(
protocol
.
Protocol
):
"""Read a downloaded file incrementally and write to file."""
def
__init__
(
self
,
finished
,
file
):
"""Create a FileWriter.
.. warning:: We currently only handle the first 2MB of a file. Files
over 2MB will be truncated prematurely. *note*: this should be
enough for the mirrors file.
:param finished: A :class:`~twisted.internet.defer.Deferred` which
will fire when another portion of the download is complete.
"""
self
.
finished
=
finished
self
.
remaining
=
1024
*
1024
*
2
self
.
fh
=
file
def
dataReceived
(
self
,
bytes
):
"""Write a portion of the download with ``bytes`` size to disk."""
if
self
.
remaining
:
display
=
bytes
[:
self
.
remaining
]
self
.
fh
.
write
(
display
)
self
.
fh
.
flush
()
self
.
remaining
-=
len
(
display
)
def
connectionLost
(
self
,
reason
):
"""Called when the download is complete."""
logging
.
info
(
'Finished receiving mirrors list: %s'
%
reason
.
getErrorMessage
())
self
.
finished
.
callback
(
None
)
# Based in tor2web.utils.ssl (Tor2web)
class
HTTPSVerifyingContextFactory
(
ssl
.
ClientContextFactory
):
def
__init__
(
self
,
cn
):
self
.
cn
=
cn
#
# From https://docs.python.org/2/library/ssl.html#ssl-security
#
# "SSL versions 2 and 3 are considered insecure and are therefore
# dangerous to use. If you want maximum compatibility between clients
# and servers, it is recommended to use PROTOCOL_SSLv23 as the protocol
# version and then disable SSLv2 and SSLv3 explicitly"
#
self
.
method
=
SSL
.
SSLv23_METHOD
def
getContext
(
self
,
hostname
,
port
):
"""Get this connection's OpenSSL context.
We disable SSLv2 and SSLv3. We also check the certificate received
is the one we expect (using the "common name").
"""
ctx
=
self
.
_contextFactory
(
self
.
method
)
ctx
.
set_options
(
SSL
.
OP_NO_SSLv2
)
ctx
.
set_options
(
SSL
.
OP_NO_SSLv3
)
ctx
.
set_verify
(
SSL
.
VERIFY_PEER
|
SSL
.
VERIFY_FAIL_IF_NO_PEER_CERT
,
self
.
verifyCN
)
return
ctx
def
verifyCN
(
self
,
connection
,
x509
,
errno
,
depth
,
preverifyOK
):
# DEBUG: print "%s == %s ?" % (self.cn, x509.get_subject().commonName)
# Somehow, if I don't set this to true, the verifyCN doesn't go
# down in the chain, I don't know if this is OK
verify
=
True
if
depth
==
0
:
if
self
.
cn
==
x509
.
get_subject
().
commonName
:
verify
=
True
else
:
verify
=
False
return
verify
# Based in get-tor-exits (BridgeDB)
def
handle
(
failure
):
"""Handle a **failure**."""
if
failure
.
type
==
ConnectionRefusedError
:
logging
.
error
(
"Couldn't download file; connection was refused."
)
elif
failure
.
type
==
DNSLookupError
:
logging
.
error
(
"Couldn't download file; domain resolution failed."
)
elif
failure
.
type
==
TimeoutError
:
logging
.
error
(
"Couldn't download file; connection timed out."
)
else
:
logging
.
error
(
"Couldn't download file."
)
print
"Couldn't download file. Check the log."
os
.
_exit
(
-
1
)
# Taken from get-tor-exits (BridgeDB)
def
writeToFile
(
response
,
filename
):
"""Write requested content to filename."""
finished
=
defer
.
Deferred
()
response
.
deliverBody
(
FileWriter
(
finished
,
filename
))
return
finished
def
is_json
(
my_json
):
"""Check if json generated is valid."""
try
:
json_object
=
json
.
loads
(
my_json
)
except
ValueError
,
e
:
return
False
return
True
def
add_tpo_link
(
url
):
"""Add the download link for Tor Browser."""
uri
=
'projects/torbrowser.html.en#downloads'
if
url
.
endswith
(
'/'
):
return
"%s%s"
%
(
url
,
uri
)
else
:
return
"%s/%s"
%
(
url
,
uri
)
def
add_entry
(
mirrors
,
columns
,
elements
):
"""Add entry to mirrors list."""
entry
=
{}
count
=
0
for
e
in
elements
:
e
=
e
.
replace
(
"
\n
"
,
''
)
entry
[
columns
[
count
]]
=
e
count
=
count
+
1
mirrors
.
append
(
entry
)
def
add_mirror
(
file
,
entry
,
proto
):
"""Add mirror to mirrors list."""
# if proto requested is http(s), we add link to download section
if
PROTOS
[
proto
]
==
'http'
or
PROTOS
[
proto
]
==
'https'
:
uri
=
add_tpo_link
(
entry
[
proto
])
else
:
uri
=
entry
[
proto
]
file
.
write
(
"%s - by %s (%s)
\n
"
%
(
uri
,
entry
[
'orgName'
],
entry
[
'subRegion'
],
)
)
def
main
():
"""Script to get the list of tpo's mirrors from tpo and adapt it to
be used by GetTor.
Usage: python2.7 get_mirrors.py [-h] [--proto protocol]
By default, the protocol is 'https'. Possible values of protcol are:
http, https, rsync, ftp, onion, http-dist, https-dist, rsync-dist.
"""
parser
=
argparse
.
ArgumentParser
(
description
=
"Utility to download tpo's mirrors and make it usable
\
by GetTor."
)
parser
.
add_argument
(
'-p'
,
'--proto'
,
default
=
'https'
,
help
=
'Protocol filter. Possible values: http, https, rsync, ftp, onion
\
http-dist, https-dist, rsync-dist. Default to https.'
)
args
=
parser
.
parse_args
()
p
=
args
.
proto
gettor_path
=
'/path/to/gettor/'
csv_path
=
os
.
path
.
join
(
gettor_path
,
'tor-mirrors.csv'
)
json_path
=
os
.
path
.
join
(
gettor_path
,
'tor-mirrors'
)
mirrors_list
=
os
.
path
.
join
(
gettor_path
,
'mirrors-list.txt'
)
# Load tpo certificate and extract common name, we'll later compare this
# with the certificate sent by tpo to check we're really taltking to it
try
:
data
=
open
(
CERT_TPO
).
read
()
x509
=
crypto
.
load_certificate
(
crypto
.
FILETYPE_PEM
,
data
)
cn_tpo
=
x509
.
get_subject
().
commonName
except
Exception
as
e
:
logging
.
error
(
"Error with certificate: %s"
%
str
(
e
))
return
# While we wait the json of mirrors to be implemented in tpo, we need
# to download the csv file and transform it to json
# The code below is based in get-tor-exits script from BridgeDB and
# the tor2web.utils.ssl module from Tor2web
url
=
'https://www.torproject.org/include/tor-mirrors.csv'
try
:
fh
=
open
(
csv_path
,
'w'
)
except
IOError
as
e
:
logging
.
error
(
"Could not open %s"
%
csv_path
)
return
logging
.
info
(
"Requesting %s..."
%
url
)
# If certificate don't match an exception will be raised
# this is my first experience with twisted, maybe I'll learn to handle
# this better some time in the future...
contextFactory
=
HTTPSVerifyingContextFactory
(
cn_tpo
)
agent
=
client
.
Agent
(
reactor
,
contextFactory
)
d
=
agent
.
request
(
"GET"
,
url
)
d
.
addCallback
(
writeToFile
,
fh
)
d
.
addErrback
(
handle
)
d
.
addCallbacks
(
log
.
msg
,
log
.
err
)
if
not
reactor
.
running
:
d
.
addCallback
(
lambda
ignored
:
reactor
.
stop
())
reactor
.
run
()
logging
.
info
(
"File downloaded!"
)
# Now transform it to json -- I couldn't find out how to use a
# two-character delimiter with the csv package, so I decided to handle
# the csv data by hand. We are doing this until #16601 gets deployed.
# https://trac.torproject.org/projects/tor/ticket/16601
# Code below is based in update-mirrors-json.py script from tpo
# These are the names of each column e.g. adminContact
columns
=
[]
# List of mirrors to be built
mirrors
=
[]
logging
.
info
(
"Transforming csv data into json..."
)
logging
.
info
(
"Getting data from csv"
)
try
:
with
codecs
.
open
(
csv_path
,
"rb"
,
"utf-8"
)
as
csvfile
:
for
line
in
csvfile
:
elements
=
line
.
split
(
", "
)
# first entry have the names of the columns
if
not
columns
:
columns
=
elements
else
:
add_entry
(
mirrors
,
columns
,
elements
)
except
IOError
as
e
:
logging
.
error
(
"Couldn't read csv file: %s"
%
str
(
e
))
return
logging
.
info
(
"Creating json"
)
if
is_json
(
json
.
dumps
(
mirrors
)):
try
:
with
codecs
.
open
(
json_path
,
"w"
,
"utf-8"
)
as
jsonfile
:
# Make pretty json
json
.
dump
(
mirrors
,
jsonfile
,
sort_keys
=
True
,
indent
=
4
,
separators
=
(
','
,
': '
),
encoding
=
"utf-8"
,
)
except
IOError
as
e
:
logging
.
error
(
"Couldn't write json: %s"
%
str
(
e
))
return
else
:
logging
.
error
(
"Invalid json file"
)
return
# Now make the mirrors list to be used by GetTor
logging
.
info
(
"Reading json"
)
try
:
mirrors_json
=
codecs
.
open
(
json_path
,
"rb"
,
"utf-8"
)
mirrors
=
json
.
load
(
mirrors_json
)
except
IOError
as
e
:
logging
.
error
(
"Couldn't open %s"
%
json_path
)
return
logging
.
info
(
"Creating new list with protocol: %s"
%
p
)
try
:
list
=
codecs
.
open
(
mirrors_list
,
"w"
,
"utf-8"
)
for
entry
in
mirrors
:
if
args
.
proto
is
not
'all'
:
for
e
in
entry
:
if
e
==
PROTOS
[
p
]
and
entry
[
PROTOS
[
p
]]:
add_mirror
(
list
,
entry
,
PROTOS
[
p
])
else
:
for
k
,
v
in
PROTOS
:
if
entry
[
v
]:
add_mirror
(
list
,
entry
,
v
)
logging
.
info
(
"List created: %s"
%
mirrors_list
)
except
IOError
as
e
:
logging
.
error
(
"Couldn't open %s"
%
mirrors_list
)
return
if
__name__
==
"__main__"
:
logging_format
=
utils
.
get_logging_format
()
logging
.
basicConfig
(
filename
=
'/path/to/gettor/log/get-mirrors.log'
,
format
=
logging_format
,
datefmt
=
"%Y-%m-%d %H:%M:%S"
,
level
=
logging
.
INFO
)
logging
.
info
(
"Started"
)
main
()
logging
.
info
(
"Finished"
)
gettor/blacklist.py
View file @
4e6e5805
...
...
@@ -27,6 +27,14 @@ class BlacklistError(Exception):
pass
class
ConfigError
(
Exception
):
pass
class
InternalError
(
Exception
):
pass
class
Blacklist
(
object
):
"""Manage blacklisting of users.
...
...
@@ -38,6 +46,7 @@ class Blacklist(object):
ConfigurationError: Bad configuration.
BlacklistError: User is blacklisted.
InternalError: Something went wrong internally.
"""
...
...
@@ -47,44 +56,46 @@ class Blacklist(object):
:param: cfg (string) path of the configuration file.
"""
# define a set of default values
DEFAULT_CONFIG_FILE
=
'blacklist.cfg'
logging
.
basicConfig
(
format
=
'[%(levelname)s] %(asctime)s - %(message)s'
,
datefmt
=
"%Y-%m-%d %H:%M:%S"
)
log
=
logging
.
getLogger
(
__name__
)
default_cfg
=
'blacklist.cfg'
config
=
ConfigParser
.
ConfigParser
()
if
cfg
is
None
or
not
os
.
path
.
isfile
(
cfg
):
cfg
=
DEFAULT_CONFIG_FILE
config
.
read
(
cfg
)
cfg
=
default_cfg
try
:
dbname
=
config
.
get
(
'general'
,
'db'
)
self
.
db
=
db
.
DB
(
dbname
)
except
ConfigParser
.
Error
as
e
:
raise
ConfigError
(
"
Couldn't read 'db' from 'general'"
)
with
open
(
cfg
)
as
f
:
config
.
readfp
(
f
)
except
IOError
:
raise
ConfigError
(
"
File %s not found!"
%
cfg
)
try
:
dbname
=
config
.
get
(
'general'
,
'db'
)
logdir
=
config
.
get
(
'log'
,
'dir'
)
logfile
=
os
.
path
.
join
(
logdir
,
'blacklist.log'
)
except
ConfigParser
.
Error
as
e
:
raise
ConfigError
(
"Couldn't read 'dir' from 'log'"
)
try
:
loglevel
=
config
.
get
(
'log'
,
'level'
)
self
.
db
=
db
.
DB
(
dbname
)
except
ConfigParser
.
Error
as
e
:
raise
ConfigError
(
"Couldn't read 'level' from 'log'"
)
raise
ConfigError
(
"%s"
%
e
)
except
db
.
Exception
as
e
:
raise
ConfigError
(
"%s"
%
e
)
# establish log level and redirect to log file
log
.
info
(
'Redirecting logging to %s'
%
logfile
)
# logging
log
=
logging
.
getLogger
(
__name__
)
logging_format
=
utils
.
get_logging_format
()
date_format
=
utils
.
get_date_format
()
formatter
=
logging
.
Formatter
(
logging_format
,
date_format
)
log
.
info
(
'Redirecting BLACKLIST logging to %s'
%
logfile
)
logfileh
=
logging
.
FileHandler
(
logfile
,
mode
=
'a+'
)
logfileh
.
setFormatter
(
formatter
)
logfileh
.
setLevel
(
logging
.
getLevelName
(
loglevel
))
log
.
addHandler
(
logfileh
)
# stop logging on stdout from now on
log
.
propagate
=
False
self
.
log
=
log
def
is_blacklisted
(
self
,
user
,
service
,
max_req
,
wait_time
):
"""Check if a user is blacklisted.
...
...
@@ -109,28 +120,41 @@ class Blacklist(object):
:raise: BlacklistError if the user is blacklisted
"""
r
=
self
.
db
.
get_user
(
user
,
service
)
if
r
:
# permanently blacklisted
if
r
[
'blocked'
]:
self
.
db
.
update_user
(
user
,
service
,
r
[
'times'
]
+
1
,
1
)
raise
BlacklistError
(
"Blocked user"
)
# don't be greedy
elif
r
[
'times'
]
>=
max_req
:
last
=
datetime
.
datetime
.
fromtimestamp
(
float
(
r
[
'last_request'
]))
next
=
last
+
datetime
.
timedelta
(
minutes
=
wait_time
)
if
datetime
.
datetime
.
now
()
<
next
:
# too many requests from the same user
self
.
db
.
update_user
(
user
,
service
,
r
[
'times'
]
+
1
,
0
)
raise
BlacklistError
(
"Too many requests"
)
try
:
self
.
log
.
info
(
"Trying to get info from user"
)
self
.
db
.
connect
()
r
=
self
.
db
.
get_user
(
user
,
service
)
if
r
:
# permanently blacklisted
if
r
[
'blocked'
]:
self
.
log
.
warning
(
"Request from user permanently blocked"
)
self
.
db
.
update_user
(
user
,
service
,
r
[
'times'
]
+
1
,
1
)
raise
BlacklistError
(
"Blocked user"
)
# don't be greedy
elif
r
[
'times'
]
>=
max_req
:
last
=
datetime
.
datetime
.
fromtimestamp
(
float
(
r
[
'last_request'
])
)
next
=
last
+
datetime
.
timedelta
(
minutes
=
wait_time
)
if
datetime
.
datetime
.
now
()
<
next
:
self
.
log
.
warning
(
"Too many requests from same user"
)
self
.
db
.
update_user
(
user
,
service
,
r
[
'times'
]
+
1
,
0
)
raise
BlacklistError
(
"Too many requests"
)
else
:
# fresh user again!
self
.
log
.
info
(
"Updating counter for existing user"
)
self
.
db
.
update_user
(
user
,
service
,
1
,
0
)
else
:
# fresh user again!
self
.
db
.
update_user
(
user
,
service
,
1
,
0
)
# adding up a request for user
self
.
log
.
info
(
"Request from existing user"
)
self
.
db
.
update_user
(
user
,
service
,
r
[
'times'
]
+
1
,
0
)
else
:
# adding up a request for user
self
.
db
.
update_user
(
user
,
service
,
r
[
'times'
]
+
1
,
0
)
else
:
# new request for user
self
.
db
.
add_user
(
user
,
service
,
0
)
# new request for user
self
.
log
.
info
(
"Request from new user"
)
self
.
db
.
add_user
(
user
,
service
,
0
)
except
db
.
DBError
as
e
:
self
.
log
.
error
(
"Something failed!"
)
raise
InternalError
(
"Error with database (%s)"
%
str
(
e
))
except
BlacklistError
as
e
:
raise
BlacklistError
(
e
)
gettor/core.py
View file @
4e6e5805
...
...
@@ -27,11 +27,7 @@ class ConfigError(Exception):
pass
class
UnsupportedOSError
(
Exception
):
pass
class
UnsupportedLocaleError
(
Exception
):
class
NotSupportedError
(
Exception
):
pass
...
...
@@ -60,8 +56,7 @@ class Core(object):
Exceptions:
UnsupportedOSError: Request for an unsupported operating system.
UnsupportedLocaleError: Request for an unsupported locale.
UnsupportedOSError: OS and/or locale not supported.
ConfigError: Something's misconfigured.
LinkFormatError: The link added doesn't seem legit.
LinkFileError: Error related to the links file of a provider.
...
...
@@ -77,71 +72,56 @@ class Core(object):
or if something goes wrong while reading options from it.
"""
# define a set of default values
DEFAULT_CONFIG_FILE
=
'core.cfg'
logging
.
basicConfig
(
format
=
'[%(levelname)s] %(asctime)s - %(message)s'
,
datefmt
=
"%Y-%m-%d %H:%M:%S"
)
log
=
logging
.
getLogger
(
__name__
)
default_cfg
=
'core.cfg'
config
=
ConfigParser
.
ConfigParser
()
if
cfg
is
None
or
not
os
.
path
.
isfile
(
cfg
):
cfg
=
DEFAULT_CONFIG_FILE
config
.
read
(
cfg
)
try
:
basedir
=
config
.
get
(
'general'
,
'basedir'
)
except
ConfigParser
.
Error
as
e
:
raise
ConfigError
(
"Couldn't read 'basedir' from 'general'"
)
cfg
=
default_cfg
try
:
dbname
=
config
.
get
(
'general'
,
'db'
)
dbname
=
os
.
path
.
join
(
basedir
,
dbname
)
self
.
db
=
db
.
DB
(
dbname
)
except
ConfigParser
.
Error
as
e
:
raise
ConfigError
(
"Couldn't read 'db' from 'general'"
)
try
:
self
.
linksdir
=
config
.
get
(
'links'
,
'dir'
)
self
.
linksdir
=
os
.
path
.
join
(
basedir
,
self
.
linksdir
)
except
ConfigParser
.
Error
as
e
:
raise
ConfigError
(
"Couldn't read 'links' from 'dir'"
)
with
open
(
cfg
)
as
f
:
config
.
readfp
(
f
)
except
IOError
:
raise
ConfigError
(
"File %s not found!"
%
cfg
)
try
:
self
.
supported_lc
=
config
.
get
(
'links'
,
'locales'
)
except
ConfigParser
.
Error
as
e
:
raise
ConfigError
(
"Couldn't read 'locales' from 'links'"
)
try
:
self
.
supported_os
=
config
.
get
(
'links'
,
'os'
)
except
ConfigParser
.
Error
as
e
:
raise
ConfigError
(
"Couldn't read 'os' from 'links'"
)
try
:
loglevel
=
config
.
get
(
'log'
,
'level'
)
except
ConfigParser
.
Error
as
e
:
raise
ConfigError
(
"Couldn't read 'level' from 'log'"
)
try
:
basedir
=
config
.
get
(
'general'
,
'basedir'
)
self
.
linksdir
=
config
.
get
(
'links'
,
'dir'
)
self
.
linksdir
=
os
.
path
.
join
(
basedir
,
self
.
linksdir
)
self
.
i18ndir
=
config
.
get
(
'i18n'
,
'dir'
)
except
ConfigParser
.
Error
as
e
:
raise
ConfigError
(
"Couldn't read 'dir' from 'i18n'"
)
try
:
loglevel
=
config
.
get
(
'log'
,
'level'
)
logdir
=
config
.
get
(
'log'
,
'dir'
)
logfile
=
os
.
path
.
join
(
logdir
,
'core.log'
)
dbname
=
config
.
get
(
'general'
,
'db'
)
dbname
=
os
.
path
.
join
(
basedir
,
dbname
)
self
.
db
=
db
.
DB
(
dbname
)
except
ConfigParser
.
Error
as
e
:
raise
ConfigError
(
"Couldn't read 'dir' from 'log'"
)
raise
ConfigError
(
"Configuration error: %s"
%
str
(
e
))
except
db
.
Exception
as
e
:
raise
InternalError
(
"%s"
%
e
)
# logging
log
=
logging
.
getLogger
(
__name__
)
logging_format
=
utils
.
get_logging_format
()
date_format
=
utils
.
get_date_format
()