Commit 532ef347 authored by iwakeh's avatar iwakeh 🌴 Committed by Karsten Loesing
Browse files

Only unescape valid UTF.

Add a utility method for only un-escaping valid utf and supply a test
as well as test data for this issue.

Fixes task-22594.
parent e7ac4ca9
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
# Changes in version 5.1-1.12.0 - 2018-??-??

 * Minor changes
   - Don't attempt to un-escape character sequences in contact lines
     (like "\uk") that only happen to start like escaped utf-8 characters
     (like "\u0055").


# Changes in version 5.1-1.11.0 - 2018-03-14

 * Medium changes
+1 −3
Original line number Diff line number Diff line
@@ -9,7 +9,6 @@ import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonParseException;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@@ -318,8 +317,7 @@ public class DocumentStore {
       * objects are escaped JSON, e.g., \u00F2.  When Gson serlializes
       * this string, it escapes the \ to \\, hence writes \\u00F2.  We
       * need to undo this and change \\u00F2 back to \u00F2. */
      documentString = StringUtils.replace(gson.toJson(document),
          "\\\\u", "\\u");
      documentString = FormattingUtils.replaceValidUtf(gson.toJson(document));
      /* Existing details statuses don't contain opening and closing curly
       * brackets, so we should remove them from new details statuses,
       * too. */
+2 −3
Original line number Diff line number Diff line
@@ -12,12 +12,11 @@ import org.torproject.onionoo.docs.DocumentStoreFactory;
import org.torproject.onionoo.docs.SummaryDocument;
import org.torproject.onionoo.docs.UptimeDocument;
import org.torproject.onionoo.docs.WeightsDocument;
import org.torproject.onionoo.util.FormattingUtils;

import com.google.gson.Gson;
import com.google.gson.GsonBuilder;

import org.apache.commons.lang3.StringUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@@ -348,7 +347,7 @@ public class ResponseBuilder {
        /* Whenever we provide Gson with a string containing an escaped
         * non-ASCII character like \u00F2, it escapes the \ to \\, which
         * we need to undo before including the string in a response. */
        return StringUtils.replace(gson.toJson(dd), "\\\\u", "\\u");
        return FormattingUtils.replaceValidUtf(gson.toJson(dd));
      } else {
        // TODO We should probably log that we didn't find a details
        // document that we expected to exist.
+34 −0
Original line number Diff line number Diff line
@@ -3,8 +3,18 @@

package org.torproject.onionoo.util;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** Static helper methods for string processing etc. */
public class FormattingUtils {

  private static Logger log = LoggerFactory.getLogger(
      FormattingUtils.class);

  private FormattingUtils() {
  }

@@ -35,5 +45,29 @@ public class FormattingUtils {
  public static String formatDecimalNumber(long decimalNumber) {
    return String.format("%,d", decimalNumber);
  }

  private static Pattern escapePattern = Pattern.compile(
       "(\\\\{4}u[0-9a-fA-F]{4})");

  /** De-escape only valid UTF and leave anything else escaped. */
  public static String replaceValidUtf(String text) {
    if (null == text || text.isEmpty()) {
      return text;
    }
    try {
      StringBuffer sb = new StringBuffer();
      Matcher mat = escapePattern.matcher(text);
      while (mat.find()) {
        String unescaped = mat.group(1);
        mat.appendReplacement(sb, unescaped);
      }
      mat.appendTail(sb);
      return sb.toString();
    } catch (Throwable ex) {
      log.debug("Couldn't process input '{}'.", text, ex);
      return text;
    }
  }

}
+43 −0
Original line number Diff line number Diff line
package org.torproject.onionoo.util;

import static org.junit.Assert.assertEquals;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameter;
import org.junit.runners.Parameterized.Parameters;

import java.io.File;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

@RunWith(Parameterized.class)
public class FormattingUtilsTest {

  /** Provide test data. */
  @Parameters
  public static Collection<String[]> data() throws Exception {
    List<String> lines = Files.readAllLines((new File(ClassLoader
        .getSystemResource("lines-for-escape-tests.txt").toURI()))
        .toPath());
    List<String[]> testData = new ArrayList<>();
    for (int i = 0; i < lines.size(); i += 2) {
      testData.add(new String[]{lines.get(i), lines.get(i + 1)});
    }
    return testData;
  }

  @Parameter(0)
  public String in;

  @Parameter(1)
  public String out;

  @Test
  public void testReplaceUtf() {
    assertEquals(out, new String(FormattingUtils.replaceValidUtf(in)));
  }
}
Loading