torgzip.c 12.4 KB
Newer Older
1
2
/* Copyright (c) 2004, Roger Dingledine.
 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
Karsten Loesing's avatar
Karsten Loesing committed
3
 * Copyright (c) 2007-2009, The Tor Project, Inc. */
4
5
6
7
/* See LICENSE for licensing information */

/**
 * \file torgzip.c
8
 * \brief A simple in-memory gzip implementation.
9
10
11
12
13
14
15
 **/

#include "orconfig.h"

#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
16
#ifdef _MSC_VER
Nick Mathewson's avatar
Nick Mathewson committed
17
18
#include "..\..\contrib\zlib\zlib.h"
#else
19
#include <zlib.h>
Nick Mathewson's avatar
Nick Mathewson committed
20
#endif
21
#include <string.h>
22
#ifdef HAVE_NETINET_IN_H
23
#include <netinet/in.h>
24
#endif
25
26
27
28
29

#include "util.h"
#include "log.h"
#include "torgzip.h"

30
31
/** Set to 1 if zlib is a version that supports gzip; set to 0 if it doesn't;
 * set to -1 if we haven't checked yet. */
32
33
static int gzip_is_supported = -1;

34
35
/** Return true iff we support gzip-based compression.  Otherwise, we need to
 * use zlib. */
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
int
is_gzip_supported(void)
{
  if (gzip_is_supported >= 0)
    return gzip_is_supported;

  if (!strcmpstart(ZLIB_VERSION, "0.") ||
      !strcmpstart(ZLIB_VERSION, "1.0") ||
      !strcmpstart(ZLIB_VERSION, "1.1"))
    gzip_is_supported = 0;
  else
    gzip_is_supported = 1;

  return gzip_is_supported;
}

52
/** Return the 'bits' value to tell zlib to use <b>method</b>.*/
53
54
55
56
57
58
59
static INLINE int
method_bits(compress_method_t method)
{
  /* Bits+16 means "use gzip" in zlib >= 1.2 */
  return method == GZIP_METHOD ? 15+16 : 15;
}

60
61
62
63
64
/** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly
 * allocated buffer, using the method described in <b>method</b>.  Store the
 * compressed string in *<b>out</b>, and its length in *<b>out_len</b>.
 * Return 0 on success, -1 on failure.
 */
65
66
int
tor_gzip_compress(char **out, size_t *out_len,
Nick Mathewson's avatar
Nick Mathewson committed
67
                  const char *in, size_t in_len,
68
69
70
                  compress_method_t method)
{
  struct z_stream_s *stream = NULL;
71
  size_t out_size, old_size;
72
73
  off_t offset;

74
75
76
  tor_assert(out);
  tor_assert(out_len);
  tor_assert(in);
77
  tor_assert(in_len < UINT_MAX);
78

79
80
  *out = NULL;

81
82
  if (method == GZIP_METHOD && !is_gzip_supported()) {
    /* Old zlib version don't support gzip in deflateInit2 */
83
    log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
84
    goto err;
85
86
87
88
89
90
91
  }

  stream = tor_malloc_zero(sizeof(struct z_stream_s));
  stream->zalloc = Z_NULL;
  stream->zfree = Z_NULL;
  stream->opaque = NULL;
  stream->next_in = (unsigned char*) in;
92
  stream->avail_in = (unsigned int)in_len;
93
94

  if (deflateInit2(stream, Z_BEST_COMPRESSION, Z_DEFLATED,
Nick Mathewson's avatar
Nick Mathewson committed
95
96
                   method_bits(method),
                   8, Z_DEFAULT_STRATEGY) != Z_OK) {
97
98
    log_warn(LD_GENERAL, "Error from deflateInit2: %s",
             stream->msg?stream->msg:"<no message>");
99
100
101
102
103
104
105
    goto err;
  }

  /* Guess 50% compression. */
  out_size = in_len / 2;
  if (out_size < 1024) out_size = 1024;
  *out = tor_malloc(out_size);
106
  stream->next_out = (unsigned char*)*out;
107
  stream->avail_out = (unsigned int)out_size;
108
109
110
111
112
113
114
115
116
117
118

  while (1) {
    switch (deflate(stream, Z_FINISH))
      {
      case Z_STREAM_END:
        goto done;
      case Z_OK:
        /* In case zlib doesn't work as I think .... */
        if (stream->avail_out >= stream->avail_in+16)
          break;
      case Z_BUF_ERROR:
Nick Mathewson's avatar
Nick Mathewson committed
119
        offset = stream->next_out - ((unsigned char*)*out);
120
        old_size = out_size;
Nick Mathewson's avatar
Nick Mathewson committed
121
        out_size *= 2;
122
123
124
125
        if (out_size < old_size) {
          log_warn(LD_GENERAL, "Size overflow in compression.");
          goto err;
        }
Nick Mathewson's avatar
Nick Mathewson committed
126
        *out = tor_realloc(*out, out_size);
127
        stream->next_out = (unsigned char*)(*out + offset);
128
129
130
131
132
133
        if (out_size - offset > UINT_MAX) {
          log_warn(LD_BUG,  "Ran over unsigned int limit of zlib while "
                   "uncompressing.");
          goto err;
        }
        stream->avail_out = (unsigned int)(out_size - offset);
Nick Mathewson's avatar
Nick Mathewson committed
134
        break;
135
      default:
136
137
        log_warn(LD_GENERAL, "Gzip compression didn't finish: %s",
                 stream->msg ? stream->msg : "<no message>");
138
139
140
141
142
        goto err;
      }
  }
 done:
  *out_len = stream->total_out;
143
144
145
146
147
148
149
150
151
152
#ifdef OPENBSD
  /* "Hey Rocky!  Watch me change an unsigned field to a signed field in a
   *    third-party API!"
   * "Oh, that trick will just make people do unsafe casts to the unsigned
   *    type in their cross-platform code!"
   * "Don't be foolish.  I'm _sure_ they'll have the good sense to make sure
   *    the newly unsigned field isn't negative." */
  tor_assert(stream->total_out >= 0);
#endif
  if (((size_t)stream->total_out) > out_size + 4097) {
153
    /* If we're wasting more than 4k, don't. */
154
    *out = tor_realloc(*out, stream->total_out + 1);
155
  }
156
  if (deflateEnd(stream)!=Z_OK) {
157
    log_warn(LD_BUG, "Error freeing gzip structures");
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
    goto err;
  }
  tor_free(stream);

  return 0;
 err:
  if (stream) {
    deflateEnd(stream);
    tor_free(stream);
  }
  if (*out) {
    tor_free(*out);
  }
  return -1;
}

174
175
/** Given zero or more zlib-compressed or gzip-compressed strings of
 * total length
176
177
178
179
 * <b>in_len</b> bytes at <b>in</b>, uncompress them into a newly allocated
 * buffer, using the method described in <b>method</b>.  Store the uncompressed
 * string in *<b>out</b>, and its length in *<b>out_len</b>.  Return 0 on
 * success, -1 on failure.
180
181
182
183
 *
 * If <b>complete_only</b> is true, we consider a truncated input as a
 * failure; otherwise we decompress as much as we can.  Warn about truncated
 * or corrupt inputs at <b>protocol_warn_level</b>.
184
 */
185
186
int
tor_gzip_uncompress(char **out, size_t *out_len,
Nick Mathewson's avatar
Nick Mathewson committed
187
                    const char *in, size_t in_len,
188
                    compress_method_t method,
189
190
                    int complete_only,
                    int protocol_warn_level)
191
192
{
  struct z_stream_s *stream = NULL;
193
  size_t out_size, old_size;
194
  off_t offset;
195
  int r;
196

197
198
199
  tor_assert(out);
  tor_assert(out_len);
  tor_assert(in);
200
  tor_assert(in_len < UINT_MAX);
201
202
203

  if (method == GZIP_METHOD && !is_gzip_supported()) {
    /* Old zlib version don't support gzip in inflateInit2 */
204
    log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
205
206
207
208
209
210
211
212
213
214
    return -1;
  }

  *out = NULL;

  stream = tor_malloc_zero(sizeof(struct z_stream_s));
  stream->zalloc = Z_NULL;
  stream->zfree = Z_NULL;
  stream->opaque = NULL;
  stream->next_in = (unsigned char*) in;
215
  stream->avail_in = (unsigned int)in_len;
216
217
218

  if (inflateInit2(stream,
                   method_bits(method)) != Z_OK) {
219
220
    log_warn(LD_GENERAL, "Error from inflateInit2: %s",
             stream->msg?stream->msg:"<no message>");
221
222
223
224
225
    goto err;
  }

  out_size = in_len * 2;  /* guess 50% compression. */
  if (out_size < 1024) out_size = 1024;
226
227
  if (out_size > UINT_MAX)
    goto err;
228
229

  *out = tor_malloc(out_size);
230
  stream->next_out = (unsigned char*)*out;
231
  stream->avail_out = (unsigned int)out_size;
232
233

  while (1) {
234
    switch (inflate(stream, complete_only ? Z_FINISH : Z_SYNC_FLUSH))
235
236
      {
      case Z_STREAM_END:
237
238
        if (stream->avail_in == 0)
          goto done;
239
        /* There may be more compressed data here. */
240
        if ((r = inflateEnd(stream)) != Z_OK) {
241
          log_warn(LD_BUG, "Error freeing gzip structures");
242
243
          goto err;
        }
244
        if (inflateInit2(stream, method_bits(method)) != Z_OK) {
245
          log_warn(LD_GENERAL, "Error from second inflateInit2: %s",
246
                   stream->msg?stream->msg:"<no message>");
247
248
249
          goto err;
        }
        break;
250
      case Z_OK:
251
252
        if (!complete_only && stream->avail_in == 0)
          goto done;
Nick Mathewson's avatar
Nick Mathewson committed
253
254
255
        /* In case zlib doesn't work as I think.... */
        if (stream->avail_out >= stream->avail_in+16)
          break;
256
      case Z_BUF_ERROR:
257
        if (stream->avail_out > 0) {
258
259
          log_fn(protocol_warn_level, LD_PROTOCOL,
                 "possible truncated or corrupt zlib data");
260
261
          goto err;
        }
262
        offset = stream->next_out - (unsigned char*)*out;
263
        old_size = out_size;
Nick Mathewson's avatar
Nick Mathewson committed
264
        out_size *= 2;
265
266
267
268
        if (out_size < old_size) {
          log_warn(LD_GENERAL, "Size overflow in compression.");
          goto err;
        }
Nick Mathewson's avatar
Nick Mathewson committed
269
        *out = tor_realloc(*out, out_size);
270
        stream->next_out = (unsigned char*)(*out + offset);
271
272
273
274
275
276
        if (out_size - offset > UINT_MAX) {
          log_warn(LD_BUG,  "Ran over unsigned int limit of zlib while "
                   "uncompressing.");
          goto err;
        }
        stream->avail_out = (unsigned int)(out_size - offset);
Nick Mathewson's avatar
Nick Mathewson committed
277
        break;
278
      default:
279
280
        log_warn(LD_GENERAL, "Gzip decompression returned an error: %s",
                 stream->msg ? stream->msg : "<no message>");
Nick Mathewson's avatar
Nick Mathewson committed
281
        goto err;
282
283
284
      }
  }
 done:
285
  *out_len = stream->next_out - (unsigned char*)*out;
286
287
288
  r = inflateEnd(stream);
  tor_free(stream);
  if (r != Z_OK) {
289
    log_warn(LD_BUG, "Error freeing gzip structures");
290
    goto err;
291
292
  }

293
294
295
296
297
  /* NUL-terminate output. */
  if (out_size == *out_len)
    *out = tor_realloc(*out, out_size + 1);
  (*out)[*out_len] = '\0';

298
299
300
301
302
303
304
305
306
307
308
309
  return 0;
 err:
  if (stream) {
    inflateEnd(stream);
    tor_free(stream);
  }
  if (*out) {
    tor_free(*out);
  }
  return -1;
}

310
311
/** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely
 * to be compressed or not.  If it is, return the likeliest compression method.
312
 * Otherwise, return UNKNOWN_METHOD.
313
 */
314
compress_method_t
315
detect_compression_method(const char *in, size_t in_len)
316
{
317
  if (in_len > 2 && !memcmp(in, "\x1f\x8b", 2)) {
318
319
    return GZIP_METHOD;
  } else if (in_len > 2 && (in[0] & 0x0f) == 8 &&
320
             (ntohs(get_uint16(in)) % 31) == 0) {
321
322
    return ZLIB_METHOD;
  } else {
323
    return UNKNOWN_METHOD;
324
325
  }
}
326

Roger Dingledine's avatar
Roger Dingledine committed
327
328
/** Internal state for an incremental zlib compression/decompression.  The
 * body of this struct is not exposed. */
329
330
331
332
333
struct tor_zlib_state_t {
  struct z_stream_s stream;
  int compress;
};

334
335
/** Construct and return a tor_zlib_state_t object using <b>method</b>.  If
 * <b>compress</b>, it's for compression; otherwise it's for
Roger Dingledine's avatar
Roger Dingledine committed
336
 * decompression. */
337
338
339
340
341
342
343
tor_zlib_state_t *
tor_zlib_new(int compress, compress_method_t method)
{
  tor_zlib_state_t *out;

  if (method == GZIP_METHOD && !is_gzip_supported()) {
    /* Old zlib version don't support gzip in inflateInit2 */
344
    log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
    return NULL;
 }

 out = tor_malloc_zero(sizeof(tor_zlib_state_t));
 out->stream.zalloc = Z_NULL;
 out->stream.zfree = Z_NULL;
 out->stream.opaque = NULL;
 out->compress = compress;
 if (compress) {
   if (deflateInit2(&out->stream, Z_BEST_COMPRESSION, Z_DEFLATED,
                    method_bits(method), 8, Z_DEFAULT_STRATEGY) != Z_OK)
     goto err;
 } else {
   if (inflateInit2(&out->stream, method_bits(method)) != Z_OK)
     goto err;
 }
 return out;

 err:
 tor_free(out);
 return NULL;
}

368
/** Compress/decompress some bytes using <b>state</b>.  Read up to
369
370
371
372
373
374
375
376
377
 * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes
 * to *<b>out</b>, adjusting the values as we go.  If <b>finish</b> is true,
 * we've reached the end of the input.
 *
 * Return TOR_ZLIB_DONE if we've finished the entire compression/decompression.
 * Return TOR_ZLIB_OK if we're processed everything from the input.
 * Return TOR_ZLIB_BUF_FULL if we're out of space on <b>out</b>.
 * Return TOR_ZLIB_ERR if the stream is corrupt.
 */
378
379
380
381
382
383
384
tor_zlib_output_t
tor_zlib_process(tor_zlib_state_t *state,
                 char **out, size_t *out_len,
                 const char **in, size_t *in_len,
                 int finish)
{
  int err;
385
386
  tor_assert(*in_len <= UINT_MAX);
  tor_assert(*out_len <= UINT_MAX);
387
  state->stream.next_in = (unsigned char*) *in;
388
  state->stream.avail_in = (unsigned int)*in_len;
389
  state->stream.next_out = (unsigned char*) *out;
390
  state->stream.avail_out = (unsigned int)*out_len;
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407

  if (state->compress) {
    err = deflate(&state->stream, finish ? Z_FINISH : Z_SYNC_FLUSH);
  } else {
    err = inflate(&state->stream, finish ? Z_FINISH : Z_SYNC_FLUSH);
  }

  *out = (char*) state->stream.next_out;
  *out_len = state->stream.avail_out;
  *in = (const char *) state->stream.next_in;
  *in_len = state->stream.avail_in;

  switch (err)
    {
    case Z_STREAM_END:
      return TOR_ZLIB_DONE;
    case Z_BUF_ERROR:
408
      if (state->stream.avail_in == 0)
409
        return TOR_ZLIB_OK;
410
411
      return TOR_ZLIB_BUF_FULL;
    case Z_OK:
412
      if (state->stream.avail_out == 0 || finish)
413
414
415
416
417
418
419
420
421
        return TOR_ZLIB_BUF_FULL;
      return TOR_ZLIB_OK;
    default:
      log_warn(LD_GENERAL, "Gzip returned an error: %s",
               state->stream.msg ? state->stream.msg : "<no message>");
      return TOR_ZLIB_ERR;
    }
}

422
/** Deallocate <b>state</b>. */
423
424
425
void
tor_zlib_free(tor_zlib_state_t *state)
{
426
427
  if (!state)
    return;
428
429
430
431
432
433
434
435
436

  if (state->compress)
    deflateEnd(&state->stream);
  else
    inflateEnd(&state->stream);

  tor_free(state);
}