Commit a66a16fb authored by Iain Learmonth's avatar Iain Learmonth
Browse files

[WIP] GeoIP file parser

See: #40004
parent 348ee9c3
Pipeline #5194 failed with stage
in 2 minutes and 17 seconds
package org.torproject.descriptor;
import java.io.Serializable;
import java.net.InetAddress;
import java.util.List;
import java.util.Optional;
/**
* A GeoIP file contains information about the geographical (country
* code) and topological (autonomous system) location of an IP address.
*
* @since 2.16.0
*/
public interface GeoipFile extends Descriptor {
String EOL = "\n";
public interface GeoipEntry extends Serializable {
InetAddress getStart();
InetAddress getEnd();
String getCountryCode();
String getAutonomousSystemNumber();
}
List<GeoipEntry> getEntries();
Optional<GeoipEntry> getEntry(InetAddress forAddress);
}
\ No newline at end of file
package org.torproject.descriptor.impl;
import org.torproject.descriptor.GeoipFile;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Objects;
public class GeoipEntryImpl implements GeoipFile.GeoipEntry {
private final InetAddress start;
private final InetAddress end;
private final String countryCode;
private final String autonomousSystemNumber;
/**
* An entry in a GeoIP file.
*
* The start and end are expecting to be a 32-bit integer represented as an
* ASCII string for IPv4 addresses, and a colon-seperated address as an
* ASCII string for IPv6 addresses. If you provide a dotted-quad IPv4 address
* string then this class will also handle that, but you won't see that in
* the files that exist at the time of writing this comment.
*
* @param start the start string found in the file
* @param end the end string found in the file
* @param countryCode the country code found in the file
* @param autonomousSystemNumber the autonomous system number found in the
* file, or null if not present
* @throws UnknownHostException on failing to parse an IP address string
*/
public GeoipEntryImpl(String start, String end, String countryCode,
String autonomousSystemNumber)
throws UnknownHostException {
InetAddress parsedStart;
InetAddress parsedEnd;
try {
int addr = Integer.parseInt(start);
parsedStart = InetAddress.getByAddress(new byte[]{
(byte) (addr >>> 24), (byte) (addr >>> 16),
(byte) (addr >>> 8), (byte) addr});
} catch (NumberFormatException nfe) {
parsedStart = InetAddress.getByName(start);
}
try {
int addr = Integer.parseInt(end);
parsedEnd = InetAddress.getByAddress(new byte[]{
(byte) (addr >>> 24), (byte) (addr >>> 16),
(byte) (addr >>> 8), (byte) addr});
} catch (NumberFormatException nfe) {
parsedEnd = InetAddress.getByName(end);
}
this.start = parsedStart;
this.end = parsedEnd;
this.countryCode = countryCode;
this.autonomousSystemNumber = autonomousSystemNumber;
}
@Override
public InetAddress getStart() {
return this.start;
}
@Override
public InetAddress getEnd() {
return this.end;
}
@Override
public String getCountryCode() {
return this.countryCode;
}
@Override
public String getAutonomousSystemNumber() {
return this.autonomousSystemNumber;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
GeoipFile.GeoipEntry that = (GeoipFile.GeoipEntry) obj;
return getStart().equals(that.getStart()) && getEnd().equals(that.getEnd())
&& getCountryCode().equals(that.getCountryCode())
&& Objects.equals(autonomousSystemNumber,
that.getAutonomousSystemNumber());
}
@Override
public int hashCode() {
return Objects.hash(getStart(), getEnd(), getCountryCode(),
this.autonomousSystemNumber);
}
}
package org.torproject.descriptor.impl;
import org.torproject.descriptor.DescriptorParseException;
import org.torproject.descriptor.GeoipFile;
import java.io.File;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Scanner;
public class GeoipFileImpl extends DescriptorImpl implements GeoipFile {
List<GeoipEntry> entries;
protected GeoipFileImpl(byte[] rawDescriptorBytes,
int[] offsetAndLength,File descriptorFile)
throws DescriptorParseException {
super(rawDescriptorBytes, offsetAndLength, descriptorFile);
entries = new ArrayList<>();
this.splitAndParseEntries();
}
private void splitAndParseEntries() throws DescriptorParseException {
Scanner scanner = this.newScanner().useDelimiter(EOL);
while (scanner.hasNext()) {
String line = scanner.next();
if (line.startsWith("@") || line.startsWith("#")) {
/* Skip annotation and comments. */
if (!scanner.hasNext()) {
throw new DescriptorParseException("Descriptor is empty.");
}
continue;
}
String[] parts = line.split(",");
if (parts.length < 3) {
if (this.unrecognizedLines == null) {
this.unrecognizedLines = new ArrayList<>();
}
this.unrecognizedLines.add(line);
}
String start = parts[0];
String end = parts[1];
String countryCode = parts[2];
String autonomousSystemNumber;
if (parts.length >= 4) {
autonomousSystemNumber = parts[3];
} else {
autonomousSystemNumber = null;
}
try {
entries.add(new GeoipEntryImpl(start, end, countryCode,
autonomousSystemNumber));
} catch (UnknownHostException e) {
if (this.unrecognizedLines == null) {
this.unrecognizedLines = new ArrayList<>();
}
this.unrecognizedLines.add(line);
}
}
}
@Override
public List<GeoipEntry> getEntries() {
return this.entries;
}
/**
* Compares two InetAddresses.
*
* @param a1 first address
* @param a2 second address
* @return -1 if a1 < a2, 0 if a1 == a2, and 1 if a1 > a2
* @throws RuntimeException when addresses have differing lengths
*/
private int compareAddresses(InetAddress a1, InetAddress a2) {
byte[] b1 = a1.getAddress();
byte[] b2 = a2.getAddress();
if (b1.length != b2.length) {
throw new RuntimeException(
"Comparing two addresses of different lengths.");
}
for (int i = 0; i < b1.length; i++) {
int i1 = ((int) b1[i] & 0xFF);
int i2 = ((int) b2[i] & 0xFF);
if (i1 < i2) {
return -1;
} else if (i1 > i2) {
return 1;
}
}
return 0;
}
@Override
public Optional<GeoipEntry> getEntry(InetAddress forAddress) {
int low = 0;
int mid;
int high = entries.size();
while (low <= high) {
mid = (low + high) / 2;
GeoipEntry entry = entries.get(mid);
int startComparisonResult = compareAddresses(
entry.getStart(), forAddress);
if (startComparisonResult == -1) {
/* entry start is less than desired address */
int endComparisonResult = compareAddresses(
entry.getEnd(), forAddress);
if (endComparisonResult >= 0) {
/* entry end is equal to or greater than desired address */
return Optional.of(entry);
}
low = mid + 1;
} else if (startComparisonResult == 1) {
/* entry start is greater than desired address */
high = mid - 1;
} else if (startComparisonResult == 0) {
/* entry start is the same as desired address */
return Optional.of(entry);
}
}
return Optional.empty();
}
}
package org.torproject.descriptor.impl;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import org.torproject.descriptor.DescriptorParseException;
import org.torproject.descriptor.GeoipFile;
import org.apache.commons.compress.utils.IOUtils;
import org.junit.Test;
import java.io.IOException;
import java.io.InputStream;
import java.net.InetAddress;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
public class GeoipFileImplTest {
@Test
public void testParseGeoIP4() throws IOException, DescriptorParseException {
LinkedList<GeoipFile.GeoipEntry> expectedResults = new LinkedList<>();
expectedResults.add(
new GeoipEntryImpl("1.0.0.0", "1.0.0.255", "AU", null));
expectedResults.add(
new GeoipEntryImpl("1.0.1.0", "1.0.3.255", "CN", null));
expectedResults.add(
new GeoipEntryImpl("1.0.4.0", "1.0.7.255", "AU", null));
expectedResults.add(
new GeoipEntryImpl("1.0.8.0", "1.0.15.255", "CN", null));
expectedResults.add(
new GeoipEntryImpl("1.0.16.0", "1.0.31.255", "JP", null));
URL resource = getClass().getClassLoader().getResource(
"geoip/geoip");
assertNotNull(resource);
InputStream dataInputStream = resource.openStream();
assertNotNull(dataInputStream);
byte[] rawDescriptorBytes = IOUtils.toByteArray(dataInputStream);
GeoipFile geoipFile = new GeoipFileImpl(rawDescriptorBytes,
new int[]{0, rawDescriptorBytes.length}, null);
List<GeoipFile.GeoipEntry> entries = geoipFile.getEntries();
assertFalse(entries.isEmpty());
for (GeoipFile.GeoipEntry e : entries) {
if (expectedResults.isEmpty()) {
break;
}
assertEquals(expectedResults.remove(), e);
}
/* first entry */
assertEquals("AU", geoipFile.getEntry(
InetAddress.getByName("1.0.0.0")).get().getCountryCode());
assertEquals("AU", geoipFile.getEntry(
InetAddress.getByName("1.0.0.1")).get().getCountryCode());
assertEquals("AU", geoipFile.getEntry(
InetAddress.getByName("1.0.0.255")).get().getCountryCode());
/* some middle entry */
assertEquals("RU", geoipFile.getEntry(
InetAddress.getByName("217.194.240.0")).get().getCountryCode());
assertEquals("RU", geoipFile.getEntry(
InetAddress.getByName("217.194.242.100")).get().getCountryCode());
assertEquals("RU", geoipFile.getEntry(
InetAddress.getByName("217.194.255.255")).get().getCountryCode());
/* last entry */
assertEquals("AU", geoipFile.getEntry(
InetAddress.getByName("223.255.255.0")).get().getCountryCode());
assertEquals("AU", geoipFile.getEntry(
InetAddress.getByName("223.255.255.128")).get().getCountryCode());
assertEquals("AU", geoipFile.getEntry(
InetAddress.getByName("223.255.255.255")).get().getCountryCode());
}
@Test
public void testParseGeoIP6() throws IOException, DescriptorParseException {
LinkedList<GeoipFile.GeoipEntry> expectedResults = new LinkedList<>();
expectedResults.add(
new GeoipEntryImpl(
"2001:200::", "2001:200:134:ffff:ffff:ffff:ffff:ffff",
"JP", null));
expectedResults.add(
new GeoipEntryImpl(
"2001:200:135::", "2001:200:135:ffff:ffff:ffff:ffff:ffff",
"US", null));
expectedResults.add(
new GeoipEntryImpl(
"2001:200:136::", "2001:200:179:ffff:ffff:ffff:ffff:ffff",
"JP", null));
expectedResults.add(
new GeoipEntryImpl(
"2001:200:17a::", "2001:200:17b:ffff:ffff:ffff:ffff:ffff",
"US", null));
expectedResults.add(
new GeoipEntryImpl(
"2001:200:17c::", "2001:200:ffff:ffff:ffff:ffff:ffff:ffff",
"JP", null));
URL resource = getClass().getClassLoader().getResource(
"geoip/geoip6");
assertNotNull(resource);
InputStream dataInputStream = resource.openStream();
assertNotNull(dataInputStream);
byte[] rawDescriptorBytes = IOUtils.toByteArray(dataInputStream);
GeoipFile geoipFile = new GeoipFileImpl(rawDescriptorBytes,
new int[]{0, rawDescriptorBytes.length}, null);
List<GeoipFile.GeoipEntry> entries = geoipFile.getEntries();
assertFalse(entries.isEmpty());
for (GeoipFile.GeoipEntry e : entries) {
GeoipFile.GeoipEntry expected = expectedResults.remove();
assertEquals(expected, e);
if (expectedResults.isEmpty()) {
break;
}
}
/* first entry */
assertEquals("JP", geoipFile.getEntry(
InetAddress.getByName("2001:200::0000")).get().getCountryCode());
assertEquals("JP", geoipFile.getEntry(
InetAddress.getByName(
"2001:200:134::cafe")).get().getCountryCode());
assertEquals("JP", geoipFile.getEntry(
InetAddress.getByName(
"2001:200:134:ffff:ffff:ffff:ffff:ffff")).get()
.getCountryCode());
/* some middle entry */
assertEquals("ZA", geoipFile.getEntry(
InetAddress.getByName("2c0f:f598:0007::")).get().getCountryCode());
assertEquals("ZA", geoipFile.getEntry(
InetAddress.getByName("2c0f:f598:ffff:ffff::0000")).get()
.getCountryCode());
assertEquals("ZA", geoipFile.getEntry(
InetAddress.getByName(
"2c0f:f598:ffff:ffff:ffff:ffff:ffff:ffff")).get()
.getCountryCode());
/* last entry */
assertEquals("MU", geoipFile.getEntry(
InetAddress.getByName("2c0f:fff1::0000")).get().getCountryCode());
assertEquals("MU", geoipFile.getEntry(
InetAddress.getByName("2c0f:fff1:face::1:1234")).get()
.getCountryCode());
assertEquals("MU", geoipFile.getEntry(
InetAddress.getByName(
"2c0f:ffff:ffff:ffff:ffff:ffff:ffff:ffff")).get()
.getCountryCode());
}
}
This diff is collapsed.
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment