Commit b4a7756a authored by Iain Learmonth's avatar Iain Learmonth
Browse files

[WIP] GeoIP files parser

See: #40004
parent a66a16fb
Pipeline #5307 passed with stage
in 2 minutes and 15 seconds
package org.torproject.descriptor;
import java.util.Map;
public interface GeoipNamesFile extends Descriptor, Map<String, String> {
String EOL = "\n";
}
\ No newline at end of file
......@@ -15,7 +15,7 @@ public class GeoipEntryImpl implements GeoipFile.GeoipEntry {
/**
* An entry in a GeoIP file.
*
* The start and end are expecting to be a 32-bit integer represented as an
* <p>The start and end are expecting to be a 32-bit integer represented as an
* ASCII string for IPv4 addresses, and a colon-seperated address as an
* ASCII string for IPv6 addresses. If you provide a dotted-quad IPv4 address
* string then this class will also handle that, but you won't see that in
......
......@@ -13,7 +13,7 @@ import java.util.Scanner;
public class GeoipFileImpl extends DescriptorImpl implements GeoipFile {
List<GeoipEntry> entries;
private final List<GeoipEntry> entries;
protected GeoipFileImpl(byte[] rawDescriptorBytes,
int[] offsetAndLength,File descriptorFile)
......
package org.torproject.descriptor.impl;
import org.torproject.descriptor.DescriptorParseException;
import org.torproject.descriptor.GeoipNamesFile;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
public class GeoipNamesFileImpl extends DescriptorImpl
implements GeoipNamesFile {
private final Map<String, String> names;
protected GeoipNamesFileImpl(byte[] rawDescriptorBytes,
int[] offsetAndLength,File descriptorFile)
throws DescriptorParseException {
super(rawDescriptorBytes, offsetAndLength, descriptorFile);
names = new HashMap<>();
this.splitAndParseEntries();
}
private void splitAndParseEntries() throws DescriptorParseException {
Scanner scanner = this.newScanner().useDelimiter(EOL);
while (scanner.hasNext()) {
String line = scanner.next();
if (line.startsWith("@")) {
/* Skip annotation. */
if (!scanner.hasNext()) {
throw new DescriptorParseException("Descriptor is empty.");
}
continue;
}
String[] parts = line.split(",", 2);
if (parts.length < 2) {
if (this.unrecognizedLines == null) {
this.unrecognizedLines = new ArrayList<>();
}
this.unrecognizedLines.add(line);
}
names.put(parts[0], parts[1]);
String name = parts[1];
}
}
@Override
public int size() {
return names.size();
}
@Override
public boolean isEmpty() {
return names.isEmpty();
}
@Override
public boolean containsKey(Object key) {
return names.containsKey(key);
}
@Override
public boolean containsValue(Object value) {
return names.containsValue(value);
}
@Override
public String get(Object key) {
return names.get(key);
}
@Override
public String put(String key, String value) {
throw new UnsupportedOperationException("Names map is read-only.");
}
@Override
public String remove(Object key) {
throw new UnsupportedOperationException("Names map is read-only.");
}
@Override
public void putAll(Map<? extends String, ? extends String> map) {
throw new UnsupportedOperationException("Names map is read-only.");
}
@Override
public void clear() {
throw new UnsupportedOperationException("Names map is read-only.");
}
@Override
public Set<String> keySet() {
return names.keySet();
}
@Override
public Collection<String> values() {
return names.values();
}
@Override
public Set<Entry<String, String>> entrySet() {
return names.entrySet();
}
}
......@@ -3,6 +3,7 @@ package org.torproject.descriptor.impl;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import org.torproject.descriptor.DescriptorParseException;
import org.torproject.descriptor.GeoipFile;
......@@ -69,6 +70,38 @@ public class GeoipFileImplTest {
InetAddress.getByName("223.255.255.128")).get().getCountryCode());
assertEquals("AU", geoipFile.getEntry(
InetAddress.getByName("223.255.255.255")).get().getCountryCode());
assertTrue(geoipFile.getUnrecognizedLines() == null
|| geoipFile.getUnrecognizedLines().isEmpty());
}
@Test
public void testParseGeoIP4WithAsn()
throws IOException, DescriptorParseException {
LinkedList<GeoipFile.GeoipEntry> expectedResults = new LinkedList<>();
expectedResults.add(
new GeoipEntryImpl("1.0.0.0", "1.0.0.255", "AU", "13335"));
expectedResults.add(
new GeoipEntryImpl("1.0.1.0", "1.0.3.255", "CN", "0"));
expectedResults.add(
new GeoipEntryImpl("1.0.4.0", "1.0.7.255", "AU", "38803"));
URL resource = getClass().getClassLoader().getResource(
"geoip/geoip-plus-asn");
assertNotNull(resource);
InputStream dataInputStream = resource.openStream();
assertNotNull(dataInputStream);
byte[] rawDescriptorBytes = IOUtils.toByteArray(dataInputStream);
GeoipFile geoipFile = new GeoipFileImpl(rawDescriptorBytes,
new int[]{0, rawDescriptorBytes.length}, null);
List<GeoipFile.GeoipEntry> entries = geoipFile.getEntries();
assertFalse(entries.isEmpty());
for (GeoipFile.GeoipEntry e : entries) {
if (expectedResults.isEmpty()) {
break;
}
assertEquals(expectedResults.remove(), e);
}
assertTrue(geoipFile.getUnrecognizedLines() == null
|| geoipFile.getUnrecognizedLines().isEmpty());
}
@Test
......@@ -141,5 +174,40 @@ public class GeoipFileImplTest {
InetAddress.getByName(
"2c0f:ffff:ffff:ffff:ffff:ffff:ffff:ffff")).get()
.getCountryCode());
assertTrue(geoipFile.getUnrecognizedLines() == null
|| geoipFile.getUnrecognizedLines().isEmpty());
}
@Test
public void testParseGeoIP6WithAsn()
throws IOException, DescriptorParseException {
LinkedList<GeoipFile.GeoipEntry> expectedResults = new LinkedList<>();
expectedResults.add(
new GeoipEntryImpl("2001::",
"2001:0:ffff:ffff:ffff:ffff:ffff:ffff", "??", "6939"));
expectedResults.add(
new GeoipEntryImpl("2001:4:112::",
"2001:4:112:ffff:ffff:ffff:ffff:ffff", "??", "112"));
expectedResults.add(
new GeoipEntryImpl("2001:200::",
"2001:200:134:ffff:ffff:ffff:ffff:ffff", "JP", "2500"));
URL resource = getClass().getClassLoader().getResource(
"geoip/geoip6-plus-asn");
assertNotNull(resource);
InputStream dataInputStream = resource.openStream();
assertNotNull(dataInputStream);
byte[] rawDescriptorBytes = IOUtils.toByteArray(dataInputStream);
GeoipFile geoipFile = new GeoipFileImpl(rawDescriptorBytes,
new int[]{0, rawDescriptorBytes.length}, null);
List<GeoipFile.GeoipEntry> entries = geoipFile.getEntries();
assertFalse(entries.isEmpty());
for (GeoipFile.GeoipEntry e : entries) {
if (expectedResults.isEmpty()) {
break;
}
assertEquals(expectedResults.remove(), e);
}
assertTrue(geoipFile.getUnrecognizedLines() == null
|| geoipFile.getUnrecognizedLines().isEmpty());
}
}
package org.torproject.descriptor.impl;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import org.torproject.descriptor.DescriptorParseException;
import org.torproject.descriptor.GeoipNamesFile;
import org.apache.commons.compress.utils.IOUtils;
import org.junit.Test;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.SortedMap;
import java.util.TreeMap;
public class GeoipNamesFileImplTest {
@Test
public void testParseGeoipAsnNames()
throws IOException, DescriptorParseException {
SortedMap<String, String> expectedResults = new TreeMap<>();
expectedResults.put("7", "The Defence Science and Technology Laboratory");
expectedResults.put("28",
"Deutsches Zentrum fuer Luft- und Raumfahrt e.V.");
expectedResults.put("137", "Consortium GARR");
expectedResults.put("224", "UNINETT AS");
expectedResults.put("251", "Denis Klimek");
expectedResults.put("328838", "AIM Firms Limited");
expectedResults.put("328839", "Silver Solutions 1234");
URL resource = getClass().getClassLoader().getResource(
"geoip/asn.txt");
assertNotNull(resource);
InputStream dataInputStream = resource.openStream();
assertNotNull(dataInputStream);
byte[] rawDescriptorBytes = IOUtils.toByteArray(dataInputStream);
GeoipNamesFile geoipNamesFile = new GeoipNamesFileImpl(rawDescriptorBytes,
new int[]{0, rawDescriptorBytes.length}, null);
assertFalse(geoipNamesFile.isEmpty());
for (String k : expectedResults.keySet()) {
assertEquals(expectedResults.get(k), geoipNamesFile.get(k));
}
assertTrue(geoipNamesFile.getUnrecognizedLines() == null
|| geoipNamesFile.getUnrecognizedLines().isEmpty());
}
@Test
public void testParseGeoipCountryNames()
throws IOException, DescriptorParseException {
SortedMap<String, String> expectedResults = new TreeMap<>();
expectedResults.put("AD", "Andorra");
expectedResults.put("AE", "United Arab Emirates");
expectedResults.put("BQ", "Bonaire, Sint Eustatius and Saba");
expectedResults.put("ZW", "Zimbabwe");
URL resource = getClass().getClassLoader().getResource(
"geoip/countries.txt");
assertNotNull(resource);
InputStream dataInputStream = resource.openStream();
assertNotNull(dataInputStream);
byte[] rawDescriptorBytes = IOUtils.toByteArray(dataInputStream);
GeoipNamesFile geoipNamesFile = new GeoipNamesFileImpl(rawDescriptorBytes,
new int[]{0, rawDescriptorBytes.length}, null);
assertFalse(geoipNamesFile.isEmpty());
for (String k : expectedResults.keySet()) {
assertEquals(expectedResults.get(k), geoipNamesFile.get(k));
}
assertTrue(geoipNamesFile.getUnrecognizedLines() == null
|| geoipNamesFile.getUnrecognizedLines().isEmpty());
}
}
This diff is collapsed.
AD,Andorra
AE,United Arab Emirates
AF,Afghanistan
AG,Antigua and Barbuda
AI,Anguilla
AL,Albania
AM,Armenia
AN,Netherlands Antilles
AO,Angola
AP,Asia/Pacific
AQ,Antarctica
AR,Argentina
AS,American Samoa
AT,Austria
AU,Australia
AW,Aruba
AX,Åland Islands
AZ,Azerbaijan
BA,Bosnia and Herzegovina
BB,Barbados
BD,Bangladesh
BE,Belgium
BF,Burkina Faso
BG,Bulgaria
BH,Bahrain
BI,Burundi
BJ,Benin
BL,Saint Barthélemy
BM,Bermuda
BN,Brunei Darussalam
BO,Bolivia, Plurinational State of
BQ,Bonaire, Sint Eustatius and Saba
BR,Brazil
BS,Bahamas
BT,Bhutan
BV,Bouvet Island
BW,Botswana
BY,Belarus
BZ,Belize
CA,Canada
CC,Cocos (Keeling) Islands
CD,Congo, Democratic Republic of the
CF,Central African Republic
CG,Congo
CH,Switzerland
CI,Côte d'Ivoire
CK,Cook Islands
CL,Chile
CM,Cameroon
CN,China
CO,Colombia
CR,Costa Rica
CS,Serbia and Montenegro
CU,Cuba
CV,Cabo Verde
CW,Curaçao
CX,Christmas Island
CY,Cyprus
CZ,Czechia
DE,Germany
DJ,Djibouti
DK,Denmark
DM,Dominica
DO,Dominican Republic
DZ,Algeria
EC,Ecuador
EE,Estonia
EG,Egypt
EH,Western Sahara
ER,Eritrea
ES,Spain
ET,Ethiopia
EU,European Union
FI,Finland
FJ,Fiji
FK,Falkland Islands (Malvinas)
FM,Micronesia, Federated States of
FO,Faroe Islands
FR,France
FX,France, Metropolitan
GA,Gabon
GB,United Kingdom of Great Britain and Northern Ireland
GD,Grenada
GE,Georgia
GF,French Guiana
GG,Guernsey
GH,Ghana
GI,Gibraltar
GL,Greenland
GM,Gambia
GN,Guinea
GP,Guadeloupe
GQ,Equatorial Guinea
GR,Greece
GS,South Georgia and the South Sandwich Islands
GT,Guatemala
GU,Guam
GW,Guinea-Bissau
GY,Guyana
HK,Hong Kong
HM,Heard Island and McDonald Islands
HN,Honduras
HR,Croatia
HT,Haiti
HU,Hungary
ID,Indonesia
IE,Ireland
IL,Israel
IM,Isle of Man
IN,India
IO,British Indian Ocean Territory
IQ,Iraq
IR,Iran, Islamic Republic of
IS,Iceland
IT,Italy
JE,Jersey
JM,Jamaica
JO,Jordan
JP,Japan
KE,Kenya
KG,Kyrgyzstan
KH,Cambodia
KI,Kiribati
KM,Comoros
KN,Saint Kitts and Nevis
KP,Korea, Democratic People's Republic of
KR,Korea, Republic of
KW,Kuwait
KY,Cayman Islands
KZ,Kazakhstan
LA,Lao People's Democratic Republic
LB,Lebanon
LC,Saint Lucia
LI,Liechtenstein
LK,Sri Lanka
LR,Liberia
LS,Lesotho
LT,Lithuania
LU,Luxembourg
LV,Latvia
LY,Libya
MA,Morocco
MC,Monaco
MD,Moldova, Republic of
ME,Montenegro
MF,Saint Martin (French part)
MG,Madagascar
MH,Marshall Islands
MK,Macedonia, the former Yugoslav Republic of
ML,Mali
MM,Myanmar
MN,Mongolia
MO,Macao
MP,Northern Mariana Islands
MQ,Martinique
MR,Mauritania
MS,Montserrat
MT,Malta
MU,Mauritius
MV,Maldives
MW,Malawi
MX,Mexico
MY,Malaysia
MZ,Mozambique
NA,Namibia
NC,New Caledonia
NE,Niger
NF,Norfolk Island
NG,Nigeria
NI,Nicaragua
NL,Netherlands
NO,Norway
NP,Nepal
NR,Nauru
NU,Niue
NZ,New Zealand
OM,Oman
PA,Panama
PE,Peru
PF,French Polynesia
PG,Papua New Guinea
PH,Philippines
PK,Pakistan
PL,Poland
PM,Saint Pierre and Miquelon
PN,Pitcairn
PR,Puerto Rico
PS,Palestine
PT,Portugal
PW,Palau
PY,Paraguay
QA,Qatar
RE,Réunion
RO,Romania
RS,Serbia
RU,Russian Federation
RW,Rwanda
SA,Saudi Arabia
SB,Solomon Islands
SC,Seychelles
SD,Sudan
SE,Sweden
SG,Singapore
SH,Saint Helena, Ascension and Tristan da Cunha
SI,Slovenia
SJ,Svalbard and Jan Mayen
SK,Slovakia
SL,Sierra Leone
SM,San Marino
SN,Senegal
SO,Somalia
SR,Suriname
SS,South Sudan
ST,Sao Tome and Principe
SV,El Salvador
SX,Sint Maarten (Dutch part)
SY,Syrian Arab Republic
SZ,Swaziland
TC,Turks and Caicos Islands
TD,Chad
TF,French Southern Territories
TG,Togo
TH,Thailand
TJ,Tajikistan
TK,Tokelau
TL,Timor-Leste
TM,Turkmenistan
TN,Tunisia
TO,Tonga
TR,Turkey
TT,Trinidad and Tobago
TV,Tuvalu
TW,Taiwan
TZ,Tanzania, United Republic of
UA,Ukraine
UG,Uganda
UM,United States Minor Outlying Islands
US,United States of America
UY,Uruguay
UZ,Uzbekistan
VA,Holy See
VC,Saint Vincent and the Grenadines
VE,Venezuela, Bolivarian Republic of
VG,Virgin Islands, British
VI,Virgin Islands, U.S.
VN,Viet Nam
VU,Vanuatu
WF,Wallis and Futuna
WS,Samoa
YE,Yemen
YT,Mayotte
ZA,South Africa
ZM,Zambia
ZW,Zimbabwe
This diff is collapsed.
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment