Commit 2f6c9baa authored by Karsten Loesing's avatar Karsten Loesing
Browse files

Always use UTF-8 as charset.

Implements #21932.
parent a8622533
# Changes in version 2.0.0 - 2017-06-??
* Major changes
- Always use UTF-8 as charset rather than using the platform's
default charset.
* Minor changes
- Replace custom ImplementationNotAccessibleException thrown by
DescriptorSourceFactory with generic RuntimeException.
......
......@@ -8,6 +8,7 @@ import org.torproject.descriptor.DescriptorParseException;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
......@@ -96,8 +97,8 @@ public abstract class DescriptorImpl implements Descriptor {
}
/**
* Returns a new {@link Scanner} for parsing the full raw descriptor starting
* using the platform's default charset.
* Returns a new {@link Scanner} for parsing the full raw descriptor using
* UTF-8 as charset.
*
* @return Scanner for the full raw descriptor bytes.
*/
......@@ -107,17 +108,16 @@ public abstract class DescriptorImpl implements Descriptor {
/**
* Returns a new {@link Scanner} for parsing the raw descriptor starting at
* byte <code>offset</code> containing <code>length</code> bytes using the
* platform's default charset.
* byte <code>offset</code> containing <code>length</code> bytes using UTF-8
* as charset.
*
* @param offset The index of the first byte to parse.
* @param length The number of bytes to parse.
* @return Scanner for the given raw descriptor bytes.
*/
protected Scanner newScanner(int offset, int length) {
/* XXX21932 */
return new Scanner(new ByteArrayInputStream(this.rawDescriptorBytes, offset,
length));
return new Scanner(new InputStreamReader(new ByteArrayInputStream(
this.rawDescriptorBytes, offset, length), StandardCharsets.UTF_8));
}
/**
......
......@@ -8,6 +8,7 @@ import org.torproject.descriptor.DescriptorParseException;
import org.torproject.descriptor.TorperfResult;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
......@@ -27,8 +28,8 @@ public class TorperfResultImpl extends DescriptorImpl
throw new DescriptorParseException("Descriptor is empty.");
}
List<Descriptor> parsedDescriptors = new ArrayList<>();
/* XXX21932 */
String descriptorString = new String(rawDescriptorBytes);
String descriptorString = new String(rawDescriptorBytes,
StandardCharsets.UTF_8);
Scanner scanner = new Scanner(descriptorString).useDelimiter("\r?\n");
String typeAnnotation = "";
while (scanner.hasNext()) {
......@@ -48,8 +49,8 @@ public class TorperfResultImpl extends DescriptorImpl
} else {
/* XXX21932 */
parsedDescriptors.add(new TorperfResultImpl(
(typeAnnotation + line).getBytes(), descriptorFile,
failUnrecognizedDescriptorLines));
(typeAnnotation + line).getBytes(StandardCharsets.UTF_8),
descriptorFile, failUnrecognizedDescriptorLines));
typeAnnotation = "";
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment