Commit 383d4a1d authored by Silvio Rhatto's avatar Silvio Rhatto
Browse files

Merge branch 'feat/circuit-ids' into 'main'

Feat: improve regex to account for parsing circuit IDs (#8)

See merge request tpo/onion-services/eotk-log-parser!1
parents 6e4521dd a68e7bf5
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -29,9 +29,9 @@ from datetime import datetime

# Log entry pattern
# Adapted from https://gist.github.com/hreeder/f1ffe1408d296ce0591d
# Tested at https://pythex.org/?regex=unix%3A%20-%20-%20%5C%5B(%3FP%3Cdateandtime%3E%5Cd%7B2%7D%5C%2F%5Ba-z%5D%7B3%7D%5C%2F%5Cd%7B4%7D%3A%5Cd%7B2%7D%3A%5Cd%7B2%7D%3A%5Cd%7B2%7D%20(%5C%2B%7C%5C-)%5Cd%7B4%7D)%5C%5D%20((%5C%22(GET%7CPOST)%20)(%3FP%3Cpath%3E.%2B)(http%5C%2F1%5C.1%22))%20(%3FP%3Cstatuscode%3E%5Cd%7B3%7D)%20(%3FP%3Cbytessent%3E%5Cd%2B)%20%5C%22(%3FP%3Curl%3E%5B%5E%5C%22%5D%2B)%5C%22%20%5C%22(%3FP%3Cuseragent%3E%5B%5E%5C%22%5D%2B)%5C%22&test_string=unix%3A%20-%20-%20%5B23%2FMay%2F2022%3A11%3A05%3A17%20%2B0000%5D%20%22GET%20%2Fsome%2Fpath.html%20HTTP%2F1.1%22%20301%200%20%22http%3A%2F%2Fihpiuhiqwudhqpiuhiuhqwiuhdpqiwuhdpiquhdqwiuhiauauauauaua.onion%2Fsome%2Fpath.html%22%20%22Mozilla%2F5.0%20(Windows%20NT%206.2%3B%20WOW64)%20AppleWebKit%2F537.31%20(KHTML%2C%20like%20Gecko)%20Chrome%2F123.4.567.89%20Safari%2F123.45%22%0Aunix%3A%20-%20-%20%5B01%2FJun%2F2022%3A00%3A05%3A07%20%2B0000%5D%20%22GET%20%2F%20HTTP%2F1.1%22%20200%20170257%20%22-%22%20%22python-requests%2F2.25.1%22&ignorecase=1&multiline=0&dotall=0&verbose=0
# Tested at https://pythex.org/?regex=(%3FP%3Corigin%3E.*)%20-%20-%20%5C%5B(%3FP%3Cdateandtime%3E%5Cd%7B2%7D%5C%2F%5Ba-z%5D%7B3%7D%5C%2F%5Cd%7B4%7D%3A%5Cd%7B2%7D%3A%5Cd%7B2%7D%3A%5Cd%7B2%7D%20(%5C%2B%7C%5C-)%5Cd%7B4%7D)%5C%5D%20((%5C%22(GET%7CPOST)%20)(%3FP%3Cpath%3E.%2B)(http%5C%2F1%5C.1%22))%20(%3FP%3Cstatuscode%3E%5Cd%7B3%7D)%20(%3FP%3Cbytessent%3E%5Cd%2B)%20%5C%22(%3FP%3Curl%3E%5B%5E%5C%22%5D%2B)%5C%22%20%5C%22(%3FP%3Cuseragent%3E%5B%5E%5C%22%5D%2B)%5C%22&test_string=unix%3A%20-%20-%20%5B23%2FMay%2F2022%3A11%3A05%3A17%20%2B0000%5D%20%22GET%20%2Fsome%2Fpath.html%20HTTP%2F1.1%22%20301%200%20%22http%3A%2F%2Fihpiuhiqwudhqpiuhiuhqwiuhdpqiwuhdpiquhdqwiuhiauauauauaua.onion%2Fsome%2Fpath.html%22%20%22Mozilla%2F5.0%20(Windows%20NT%206.2%3B%20WOW64)%20AppleWebKit%2F537.31%20(KHTML%2C%20like%20Gecko)%20Chrome%2F123.4.567.89%20Safari%2F123.45%22%0Aunix%3A%20-%20-%20%5B01%2FJun%2F2022%3A00%3A05%3A07%20%2B0000%5D%20%22GET%20%2F%20HTTP%2F1.1%22%20200%20170257%20%22-%22%20%22python-requests%2F2.25.1%22%0Afc00%3Adead%3Abeef%3A4dad%3A%3A0%3A30%20-%20-%20%5B11%2FJan%2F2024%3A17%3A08%3A43%20%2B0000%5D%20%22GET%20%2Fstatic%2Ffonts%2Ffontawesome%2Fwebfonts%2Ffa-brands-400.woff2%20HTTP%2F1.1%22%20200%2073936%20%22-%22%20%22Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20rv%3A109.0)%20Gecko%2F20100101%20Firefox%2F115.0%22&ignorecase=1&multiline=0&dotall=0&verbose=0
log_regex = re.compile(
        r"unix: - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<path>.+)(http\/1\.1\")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) \"(?P<url>[^\"]+)\" \"(?P<useragent>[^\"]+)\"",
        r"(?P<origin>.*) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<path>.+)(http\/1\.1\")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) \"(?P<url>[^\"]+)\" \"(?P<useragent>[^\"]+)\"",
        re.IGNORECASE
        )