Commit 69fa5be7 authored by Nick Mathewson's avatar Nick Mathewson 🥔
Browse files

Workaround for webservers that lie about Content-Encoding: Tor now tries to...

Workaround for webservers that lie about Content-Encoding: Tor now tries to autodetect compressed directories and compression itself. (resolves bug 65)


svn:r3374
parent e0bf5976
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -134,6 +134,7 @@ tor_gzip_compress(char **out, size_t *out_len,
  return -1;
}

/* DOCDOC -- sets *out to NULL on failure. */
int
tor_gzip_uncompress(char **out, size_t *out_len,
                    const char *in, size_t in_len,
@@ -224,3 +225,18 @@ tor_gzip_uncompress(char **out, size_t *out_len,
  return -1;
}

/** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely
 * to be compressed or not.  If it is, return the likeliest compression method.
 * Otherwise, return 0.
 */
int detect_compression_method(const char *in, size_t in_len)
{
  if (in_len > 2 && in[0] == 0x1f && in[1] == 0x8b) {
    return GZIP_METHOD;
  } else if (in_len > 2 && (in[0] & 0x0f) == 8 &&
             get_uint16(in) % 31 == 0) {
    return ZLIB_METHOD;
  } else {
    return 0;
  }
}
+5 −1
Original line number Diff line number Diff line
@@ -11,7 +11,9 @@
#define __TORGZIP_H
#define TORGZIP_H_ID "$Id$"

typedef enum { GZIP_METHOD=1, ZLIB_METHOD=2 } compress_method_t;
typedef enum {
  GZIP_METHOD=1, ZLIB_METHOD=2, UNKNOWN_METHOD=3
} compress_method_t;

int
tor_gzip_compress(char **out, size_t *out_len,
@@ -24,4 +26,6 @@ tor_gzip_uncompress(char **out, size_t *out_len,

int is_gzip_supported(void);

int detect_compression_method(const char *in, size_t in_len);

#endif
+74 −10
Original line number Diff line number Diff line
@@ -48,6 +48,7 @@ directory_send_command(connection_t *conn, const char *platform,
                       int purpose, const char *resource,
                       const char *payload, size_t payload_len);
static int directory_handle_command(connection_t *conn);
static int body_is_plausible(const char *body, size_t body_len);

/********* START VARIABLES **********/

@@ -483,7 +484,10 @@ parse_http_url(char *headers, char **url)
 * "HTTP/1.\%d \%d\%s\r\n...".
 * If it's well-formed, assign *<b>code</b>, point  and return 0.
 * If <b>date</b> is provided, set *date to the Date header in the
 * http headers, or 0 if no such header is found.
 * http headers, or 0 if no such header is found.  If <b>compression</b>
 * is provided, set *<b>compression</b> to the compression method given
 * in the Content-Encoding header, or 0 if no such header is found, or -1
 * if the value of the header is not recognized.
 * Otherwise, return -1.
 */
static int
@@ -534,8 +538,8 @@ parse_http_response(const char *headers, int *code, time_t *date,
    } else if (!strcmp(enc, "gzip") || !strcmp(enc, "x-gzip")) {
      *compression = GZIP_METHOD;
    } else {
      log_fn(LOG_WARN, "Unrecognized content encoding: '%s'", enc);
      *compression = 0;
      log_fn(LOG_INFO, "Unrecognized content encoding: '%s'", enc);
      *compression = -1;
    }
  }
  SMARTLIST_FOREACH(parsed_headers, char *, s, tor_free(s));
@@ -544,6 +548,27 @@ parse_http_response(const char *headers, int *code, time_t *date,
  return 0;
}

/** Return true iff <b>body</b> doesn't start with a plausible router or
 * running-list or directory opening.  This is a sign of possible compression.
 **/
static int
body_is_plausible(const char *body, size_t len)
{
  int i;
  if (len < 32)
    return 0;
  if (!strcmpstart(body,"router") ||
      !strcmpstart(body,"signed-directory") ||
      !strcmpstart(body,"network-status") ||
      !strcmpstart(body,"running-routers"))
    return 1;
  for (i=0;i<32;++i) {
    if (!isprint(body[i]) && !isspace(body[i]))
      return 0;
  }
  return 1;
}

/** We are a client, and we've finished reading the server's
 * response. Parse and it and act appropriately.
 *
@@ -560,6 +585,7 @@ connection_dir_client_reached_eof(connection_t *conn)
  time_t now, date_header=0;
  int delta;
  int compression;
  int plausible;

  switch (fetch_from_buf_http(conn->inbuf,
                              &headers, MAX_HEADERS_SIZE,
@@ -592,18 +618,56 @@ connection_dir_client_reached_eof(connection_t *conn)
    }
  }

  if (compression != 0) {
    char *new_body;
    size_t new_len;
    if (tor_gzip_uncompress(&new_body, &new_len, body, body_len, compression)) {
  plausible = body_is_plausible(body, body_len);
  if (compression || !plausible) {
    char *new_body = NULL;
    size_t new_len = 0;
    int guessed = detect_compression_method(body, body_len);
    if (compression <= 0 || guessed != compression) {
      /* Tell the user if we don't believe what we're told about compression.*/
      const char *description1, *description2;
      if (compression == ZLIB_METHOD)
        description1 = "as deflated";
      else if (compression = GZIP_METHOD)
        description1 = "as gzipped";
      else if (compression == 0)
        description1 = "as uncompressed";
      else
        description1 = "with an unknown Content-Encoding";
      if (guessed == ZLIB_METHOD)
        description2 = "deflated";
      else if (guessed == GZIP_METHOD)
        description2 = "gzipped";
      else if (!plausible)
        description2 = "confusing binary junk";
      else
        description2 = "uncompressed";

      log_fn(LOG_INFO, "HTTP body from server '%s' was labeled %s,"
             "but it seems to be %s.%s",
             conn->address, description1, description2,
             (compression>0 && guessed>0)?"  Trying both.":"");
    }
    /* Try declared compression first if we can. */
    if (compression > 0)
      tor_gzip_uncompress(&new_body, &new_len, body, body_len, compression);
    /* Okay, if that didn't work, and we think that it was compressed
     * differently, try that. */
    if (!new_body && guessed > 0 && compression != guessed)
      tor_gzip_uncompress(&new_body, &new_len, body, body_len, guessed);
    /* If we're pretty sure that we have a compressed directory, and
     * we didn't manage to uncompress it, then warn and bail. */
    if (!plausible && !new_body) {
      log_fn(LOG_WARN, "Unable to decompress HTTP body (server '%s').", conn->address);
      tor_free(body); tor_free(headers);
      return -1;
    }
    if (new_body) {
      tor_free(body);
      body = new_body;
      body_len = new_len;
    }
  }

  if (conn->purpose == DIR_PURPOSE_FETCH_DIR) {
    /* fetch/process the directory to learn about new routers. */