Loading CHANGELOG.md +8 −0 Original line number Diff line number Diff line # Changes in version 5.1-1.12.0 - 2018-??-?? * Minor changes - Don't attempt to un-escape character sequences in contact lines (like "\uk") that only happen to start like escaped utf-8 characters (like "\u0055"). # Changes in version 5.1-1.11.0 - 2018-03-14 * Medium changes Loading src/main/java/org/torproject/onionoo/docs/DocumentStore.java +1 −3 Original line number Diff line number Diff line Loading @@ -9,7 +9,6 @@ import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonParseException; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; Loading Loading @@ -318,8 +317,7 @@ public class DocumentStore { * objects are escaped JSON, e.g., \u00F2. When Gson serlializes * this string, it escapes the \ to \\, hence writes \\u00F2. We * need to undo this and change \\u00F2 back to \u00F2. */ documentString = StringUtils.replace(gson.toJson(document), "\\\\u", "\\u"); documentString = FormattingUtils.replaceValidUtf(gson.toJson(document)); /* Existing details statuses don't contain opening and closing curly * brackets, so we should remove them from new details statuses, * too. */ Loading src/main/java/org/torproject/onionoo/server/ResponseBuilder.java +2 −3 Original line number Diff line number Diff line Loading @@ -12,12 +12,11 @@ import org.torproject.onionoo.docs.DocumentStoreFactory; import org.torproject.onionoo.docs.SummaryDocument; import org.torproject.onionoo.docs.UptimeDocument; import org.torproject.onionoo.docs.WeightsDocument; import org.torproject.onionoo.util.FormattingUtils; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; Loading Loading @@ -348,7 +347,7 @@ public class ResponseBuilder { /* Whenever we provide Gson with a string containing an escaped * non-ASCII character like \u00F2, it escapes the \ to \\, which * we need to undo before including the string in a response. */ return StringUtils.replace(gson.toJson(dd), "\\\\u", "\\u"); return FormattingUtils.replaceValidUtf(gson.toJson(dd)); } else { // TODO We should probably log that we didn't find a details // document that we expected to exist. Loading src/main/java/org/torproject/onionoo/util/FormattingUtils.java +34 −0 Original line number Diff line number Diff line Loading @@ -3,8 +3,18 @@ package org.torproject.onionoo.util; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.regex.Matcher; import java.util.regex.Pattern; /** Static helper methods for string processing etc. */ public class FormattingUtils { private static Logger log = LoggerFactory.getLogger( FormattingUtils.class); private FormattingUtils() { } Loading Loading @@ -35,5 +45,29 @@ public class FormattingUtils { public static String formatDecimalNumber(long decimalNumber) { return String.format("%,d", decimalNumber); } private static Pattern escapePattern = Pattern.compile( "(\\\\{4}u[0-9a-fA-F]{4})"); /** De-escape only valid UTF and leave anything else escaped. */ public static String replaceValidUtf(String text) { if (null == text || text.isEmpty()) { return text; } try { StringBuffer sb = new StringBuffer(); Matcher mat = escapePattern.matcher(text); while (mat.find()) { String unescaped = mat.group(1); mat.appendReplacement(sb, unescaped); } mat.appendTail(sb); return sb.toString(); } catch (Throwable ex) { log.debug("Couldn't process input '{}'.", text, ex); return text; } } } src/test/java/org/torproject/onionoo/util/FormattingUtilsTest.java 0 → 100644 +43 −0 Original line number Diff line number Diff line package org.torproject.onionoo.util; import static org.junit.Assert.assertEquals; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; import java.io.File; import java.nio.file.Files; import java.util.ArrayList; import java.util.Collection; import java.util.List; @RunWith(Parameterized.class) public class FormattingUtilsTest { /** Provide test data. */ @Parameters public static Collection<String[]> data() throws Exception { List<String> lines = Files.readAllLines((new File(ClassLoader .getSystemResource("lines-for-escape-tests.txt").toURI())) .toPath()); List<String[]> testData = new ArrayList<>(); for (int i = 0; i < lines.size(); i += 2) { testData.add(new String[]{lines.get(i), lines.get(i + 1)}); } return testData; } @Parameter(0) public String in; @Parameter(1) public String out; @Test public void testReplaceUtf() { assertEquals(out, new String(FormattingUtils.replaceValidUtf(in))); } } Loading
CHANGELOG.md +8 −0 Original line number Diff line number Diff line # Changes in version 5.1-1.12.0 - 2018-??-?? * Minor changes - Don't attempt to un-escape character sequences in contact lines (like "\uk") that only happen to start like escaped utf-8 characters (like "\u0055"). # Changes in version 5.1-1.11.0 - 2018-03-14 * Medium changes Loading
src/main/java/org/torproject/onionoo/docs/DocumentStore.java +1 −3 Original line number Diff line number Diff line Loading @@ -9,7 +9,6 @@ import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonParseException; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; Loading Loading @@ -318,8 +317,7 @@ public class DocumentStore { * objects are escaped JSON, e.g., \u00F2. When Gson serlializes * this string, it escapes the \ to \\, hence writes \\u00F2. We * need to undo this and change \\u00F2 back to \u00F2. */ documentString = StringUtils.replace(gson.toJson(document), "\\\\u", "\\u"); documentString = FormattingUtils.replaceValidUtf(gson.toJson(document)); /* Existing details statuses don't contain opening and closing curly * brackets, so we should remove them from new details statuses, * too. */ Loading
src/main/java/org/torproject/onionoo/server/ResponseBuilder.java +2 −3 Original line number Diff line number Diff line Loading @@ -12,12 +12,11 @@ import org.torproject.onionoo.docs.DocumentStoreFactory; import org.torproject.onionoo.docs.SummaryDocument; import org.torproject.onionoo.docs.UptimeDocument; import org.torproject.onionoo.docs.WeightsDocument; import org.torproject.onionoo.util.FormattingUtils; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; Loading Loading @@ -348,7 +347,7 @@ public class ResponseBuilder { /* Whenever we provide Gson with a string containing an escaped * non-ASCII character like \u00F2, it escapes the \ to \\, which * we need to undo before including the string in a response. */ return StringUtils.replace(gson.toJson(dd), "\\\\u", "\\u"); return FormattingUtils.replaceValidUtf(gson.toJson(dd)); } else { // TODO We should probably log that we didn't find a details // document that we expected to exist. Loading
src/main/java/org/torproject/onionoo/util/FormattingUtils.java +34 −0 Original line number Diff line number Diff line Loading @@ -3,8 +3,18 @@ package org.torproject.onionoo.util; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.regex.Matcher; import java.util.regex.Pattern; /** Static helper methods for string processing etc. */ public class FormattingUtils { private static Logger log = LoggerFactory.getLogger( FormattingUtils.class); private FormattingUtils() { } Loading Loading @@ -35,5 +45,29 @@ public class FormattingUtils { public static String formatDecimalNumber(long decimalNumber) { return String.format("%,d", decimalNumber); } private static Pattern escapePattern = Pattern.compile( "(\\\\{4}u[0-9a-fA-F]{4})"); /** De-escape only valid UTF and leave anything else escaped. */ public static String replaceValidUtf(String text) { if (null == text || text.isEmpty()) { return text; } try { StringBuffer sb = new StringBuffer(); Matcher mat = escapePattern.matcher(text); while (mat.find()) { String unescaped = mat.group(1); mat.appendReplacement(sb, unescaped); } mat.appendTail(sb); return sb.toString(); } catch (Throwable ex) { log.debug("Couldn't process input '{}'.", text, ex); return text; } } }
src/test/java/org/torproject/onionoo/util/FormattingUtilsTest.java 0 → 100644 +43 −0 Original line number Diff line number Diff line package org.torproject.onionoo.util; import static org.junit.Assert.assertEquals; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; import java.io.File; import java.nio.file.Files; import java.util.ArrayList; import java.util.Collection; import java.util.List; @RunWith(Parameterized.class) public class FormattingUtilsTest { /** Provide test data. */ @Parameters public static Collection<String[]> data() throws Exception { List<String> lines = Files.readAllLines((new File(ClassLoader .getSystemResource("lines-for-escape-tests.txt").toURI())) .toPath()); List<String[]> testData = new ArrayList<>(); for (int i = 0; i < lines.size(); i += 2) { testData.add(new String[]{lines.get(i), lines.get(i + 1)}); } return testData; } @Parameter(0) public String in; @Parameter(1) public String out; @Test public void testReplaceUtf() { assertEquals(out, new String(FormattingUtils.replaceValidUtf(in))); } }