buffers.c 58.5 KB
Newer Older
Roger Dingledine's avatar
Roger Dingledine committed
1
2
/* Copyright (c) 2001 Matej Pfajfar.
 * Copyright (c) 2001-2004, Roger Dingledine.
3
 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
Karsten Loesing's avatar
Karsten Loesing committed
4
 * Copyright (c) 2007-2009, The Tor Project, Inc. */
5
/* See LICENSE for licensing information */
Roger Dingledine's avatar
Roger Dingledine committed
6

7
8
/**
 * \file buffers.c
9
 * \brief Implements a generic interface buffer.  Buffers are
10
11
 * fairly opaque string holders that can read to or flush from:
 * memory, file descriptors, or TLS connections.
12
 **/
13
#define BUFFERS_PRIVATE
Roger Dingledine's avatar
Roger Dingledine committed
14
#include "or.h"
15
16
#include "../common/util.h"
#include "../common/log.h"
17
18
19
20
21
22
23
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif

24
//#define PARANOIA
25

26
#ifdef PARANOIA
27
28
/** Helper: If PARANOIA is defined, assert that the buffer in local variable
 * <b>buf</b> is well-formed. */
29
#define check() STMT_BEGIN assert_buf_ok(buf); STMT_END
30
#else
31
#define check() STMT_NIL
32
33
#endif

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
/* Implementation notes:
 *
 * After flirting with memmove, and dallying with ring-buffers, we're finally
 * getting up to speed with the 1970s and implementing buffers as a linked
 * list of small chunks.  Each buffer has such a list; data is removed from
 * the head of the list, and added at the tail.  The list is singly linked,
 * and the buffer keeps a pointer to the head and the tail.
 *
 * Every chunk, except the tail, contains at least one byte of data.  Data in
 * each chunk is contiguous.
 *
 * When you need to treat the first N characters on a buffer as a contiguous
 * string, use the buf_pullup function to make them so.  Don't do this more
 * than necessary.
 *
 * The major free Unix kernels have handled buffers like this since, like,
 * forever.
51
 */
52

53
54
55
56
57
58
59
60
61
62
63
64
/* Chunk manipulation functions */

/** A single chunk on a buffer or in a freelist. */
typedef struct chunk_t {
  struct chunk_t *next; /**< The next chunk on the buffer or freelist. */
  size_t datalen; /**< The number of bytes stored in this chunk */
  size_t memlen; /**< The number of usable bytes of storage in <b>mem</b>. */
  char *data; /**< A pointer to the first byte of data stored in <b>mem</b>. */
  char mem[1]; /**< The actual memory used for storage in this chunk. May be
                * more than one byte long. */
} chunk_t;

65
66
#define CHUNK_HEADER_LEN STRUCT_OFFSET(chunk_t, mem[0])

67
68
/** Return the number of bytes needed to allocate a chunk to hold
 * <b>memlen</b> bytes. */
69
#define CHUNK_ALLOC_SIZE(memlen) (CHUNK_HEADER_LEN + (memlen))
70
71
/** Return the number of usable bytes in a chunk allocated with
 * malloc(<b>memlen</b>). */
72
#define CHUNK_SIZE_WITH_ALLOC(memlen) ((memlen) - CHUNK_HEADER_LEN)
73
74
75

/** Return the next character in <b>chunk</b> onto which data can be appended.
 * If the chunk is full, this might be off the end of chunk->mem. */
76
static INLINE char *
77
CHUNK_WRITE_PTR(chunk_t *chunk)
78
{
79
  return chunk->data + chunk->datalen;
80
81
}

82
83
84
85
/** Return the number of bytes that can be written onto <b>chunk</b> without
 * running out of space. */
static INLINE size_t
CHUNK_REMAINING_CAPACITY(const chunk_t *chunk)
86
{
87
  return (chunk->mem + chunk->memlen) - (chunk->data + chunk->datalen);
88
89
}

90
91
92
93
/** Move all bytes stored in <b>chunk</b> to the front of <b>chunk</b>->mem,
 * to free up space at the end. */
static INLINE void
chunk_repack(chunk_t *chunk)
94
{
95
96
97
98
  if (chunk->datalen && chunk->data != &chunk->mem[0]) {
    memmove(chunk->mem, chunk->data, chunk->datalen);
  }
  chunk->data = &chunk->mem[0];
99
100
}

101
#ifdef ENABLE_BUF_FREELISTS
102
103
104
105
106
107
108
109
110
111
/** A freelist of chunks. */
typedef struct chunk_freelist_t {
  size_t alloc_size; /**< What size chunks does this freelist hold? */
  int max_length; /**< Never allow more than this number of chunks in the
                   * freelist. */
  int slack; /**< When trimming the freelist, leave this number of extra
              * chunks beyond lowest_length.*/
  int cur_length; /**< How many chunks on the freelist now? */
  int lowest_length; /**< What's the smallest value of cur_length since the
                      * last time we cleaned this freelist? */
112
113
114
  uint64_t n_alloc;
  uint64_t n_free;
  uint64_t n_hit;
115
116
117
118
  chunk_t *head; /**< First chunk on the freelist. */
} chunk_freelist_t;

/** Macro to help define freelists. */
119
#define FL(a,m,s) { a, m, s, 0, 0, 0, 0, 0, NULL }
120
121
122
123

/** Static array of freelists, sorted by alloc_len, terminated by an entry
 * with alloc_size of 0. */
static chunk_freelist_t freelists[] = {
124
  FL(4096, 256, 8), FL(8192, 128, 4), FL(16384, 64, 4), FL(32768, 32, 2),
125
  FL(0, 0, 0)
126
127
};
#undef FL
128
129
/** How many times have we looked for a chunk of a size that no freelist
 * could help with? */
130
static uint64_t n_freelist_miss = 0;
131
132
133
134
135
136
137

static void assert_freelist_ok(chunk_freelist_t *fl);

/** Return the freelist to hold chunks of size <b>alloc</b>, or NULL if
 * no freelist exists for that size. */
static INLINE chunk_freelist_t *
get_freelist(size_t alloc)
138
{
139
140
141
142
143
  int i;
  for (i=0; freelists[i].alloc_size <= alloc; ++i) {
    if (freelists[i].alloc_size == alloc) {
      return &freelists[i];
    }
144
  }
145
  return NULL;
146
147
}

148
149
/** Deallocate a chunk or put it on a freelist */
static void
150
chunk_free_unchecked(chunk_t *chunk)
151
{
152
153
154
155
156
  size_t alloc;
  chunk_freelist_t *freelist;

  alloc = CHUNK_ALLOC_SIZE(chunk->memlen);
  freelist = get_freelist(alloc);
157
158
159
160
  if (freelist && freelist->cur_length < freelist->max_length) {
    chunk->next = freelist->head;
    freelist->head = chunk;
    ++freelist->cur_length;
161
  } else {
162
163
    if (freelist)
      ++freelist->n_free;
164
    tor_free(chunk);
165
166
  }
}
167

168
/** Allocate a new chunk with a given allocation size, or get one from the
Nick Mathewson's avatar
Nick Mathewson committed
169
 * freelist.  Note that a chunk with allocation size A can actually hold only
170
171
172
 * CHUNK_SIZE_WITH_ALLOC(A) bytes in its mem field. */
static INLINE chunk_t *
chunk_new_with_alloc_size(size_t alloc)
173
{
174
175
176
177
178
179
180
181
182
  chunk_t *ch;
  chunk_freelist_t *freelist;
  tor_assert(alloc >= sizeof(chunk_t));
  freelist = get_freelist(alloc);
  if (freelist && freelist->head) {
    ch = freelist->head;
    freelist->head = ch->next;
    if (--freelist->cur_length < freelist->lowest_length)
      freelist->lowest_length = freelist->cur_length;
183
    ++freelist->n_hit;
184
  } else {
185
    /* XXXX take advantage of tor_malloc_roundup, once we know how that
186
     * affects freelists. */
187
188
189
190
    if (freelist)
      ++freelist->n_alloc;
    else
      ++n_freelist_miss;
191
    ch = tor_malloc(alloc);
192
  }
193
194
195
196
197
  ch->next = NULL;
  ch->datalen = 0;
  ch->memlen = CHUNK_SIZE_WITH_ALLOC(alloc);
  ch->data = &ch->mem[0];
  return ch;
198
}
199
200
#else
static void
201
chunk_free_unchecked(chunk_t *chunk)
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
{
  tor_free(chunk);
}
static INLINE chunk_t *
chunk_new_with_alloc_size(size_t alloc)
{
  chunk_t *ch;
  ch = tor_malloc_roundup(&alloc);
  ch->next = NULL;
  ch->datalen = 0;
  ch->memlen = CHUNK_SIZE_WITH_ALLOC(alloc);
  ch->data = &ch->mem[0];
  return ch;
}
#endif
217

218
219
220
221
222
223
224
225
226
227
228
229
/** Expand <b>chunk</b> until it can hold <b>sz</b> bytes, and return a
 * new pointer to <b>chunk</b>.  Old pointers are no longer valid. */
static INLINE chunk_t *
chunk_grow(chunk_t *chunk, size_t sz)
{
  off_t offset;
  tor_assert(sz > chunk->memlen);
  offset = chunk->data - chunk->mem;
  chunk = tor_realloc(chunk, CHUNK_ALLOC_SIZE(sz));
  chunk->memlen = sz;
  chunk->data = chunk->mem + offset;
  return chunk;
230
231
}

232
233
234
235
236
/** If a read onto the end of a chunk would be smaller than this number, then
 * just start a new chunk. */
#define MIN_READ_LEN 8
/** Every chunk should take up at least this many bytes. */
#define MIN_CHUNK_ALLOC 256
237
/** No chunk should take up more than this many bytes. */
238
239
240
241
242
243
#define MAX_CHUNK_ALLOC 65536

/** Return the allocation size we'd like to use to hold <b>target</b>
 * bytes. */
static INLINE size_t
preferred_chunk_size(size_t target)
244
{
245
246
247
  size_t sz = MIN_CHUNK_ALLOC;
  while (CHUNK_SIZE_WITH_ALLOC(sz) < target) {
    sz <<= 1;
248
  }
249
  return sz;
250
251
}

252
253
/** Remove from the freelists most chunks that have not been used since the
 * last call to buf_shrink_freelists(). */
254
void
255
buf_shrink_freelists(int free_all)
256
{
257
#ifdef ENABLE_BUF_FREELISTS
258
259
260
261
262
263
264
265
  int i;
  for (i = 0; freelists[i].alloc_size; ++i) {
    int slack = freelists[i].slack;
    assert_freelist_ok(&freelists[i]);
    if (free_all || freelists[i].lowest_length > slack) {
      int n_to_free = free_all ? freelists[i].cur_length :
        (freelists[i].lowest_length - slack);
      int n_to_skip = freelists[i].cur_length - n_to_free;
266
      int orig_n_to_free = n_to_free, n_freed=0;
267
      int new_length = n_to_skip;
268
269
      chunk_t **chp = &freelists[i].head;
      chunk_t *chunk;
270
271
272
      log_info(LD_MM, "Cleaning freelist for %d-byte chunks: keeping %d, "
               "dropping %d.",
               (int)freelists[i].alloc_size, n_to_skip, n_to_free);
273
274
275
      while (n_to_skip) {
        tor_assert((*chp)->next);
        chp = &(*chp)->next;
276
        --n_to_skip;
277
      }
278
279
280
281
282
283
284
      chunk = *chp;
      *chp = NULL;
      while (chunk) {
        chunk_t *next = chunk->next;
        tor_free(chunk);
        chunk = next;
        --n_to_free;
285
        ++n_freed;
286
        ++freelists[i].n_free;
287
      }
288
289
290
291
292
293
294
295
296
297
      if (n_to_free) {
        log_warn(LD_BUG, "Freelist length for %d-byte chunks may have been "
                 "messed up somehow.", (int)freelists[i].alloc_size);
        log_warn(LD_BUG, "There were %d chunks at the start.  I decided to "
                 "keep %d. I wanted to free %d.  I freed %d.  I somehow think "
                 "I have %d left to free.",
                 freelists[i].cur_length, n_to_skip, orig_n_to_free,
                 n_freed, n_to_free);
      }
      // tor_assert(!n_to_free);
298
      freelists[i].cur_length = new_length;
299
    }
300
    freelists[i].lowest_length = freelists[i].cur_length;
301
    assert_freelist_ok(&freelists[i]);
302
  }
303
304
305
#else
  (void) free_all;
#endif
306
307
}

308
309
310
311
/** Describe the current status of the freelists at log level <b>severity</b>.
 */
void
buf_dump_freelist_sizes(int severity)
312
{
313
#ifdef ENABLE_BUF_FREELISTS
314
315
316
317
318
319
  int i;
  log(severity, LD_MM, "====== Buffer freelists:");
  for (i = 0; freelists[i].alloc_size; ++i) {
    uint64_t total = ((uint64_t)freelists[i].cur_length) *
      freelists[i].alloc_size;
    log(severity, LD_MM,
320
321
322
323
324
325
326
        U64_FORMAT" bytes in %d %d-byte chunks ["U64_FORMAT
        " misses; "U64_FORMAT" frees; "U64_FORMAT" hits]",
        U64_PRINTF_ARG(total),
        freelists[i].cur_length, (int)freelists[i].alloc_size,
        U64_PRINTF_ARG(freelists[i].n_alloc),
        U64_PRINTF_ARG(freelists[i].n_free),
        U64_PRINTF_ARG(freelists[i].n_hit));
327
  }
328
329
  log(severity, LD_MM, U64_FORMAT" allocations in non-freelist sizes",
      U64_PRINTF_ARG(n_freelist_miss));
330
331
332
#else
  (void)severity;
#endif
333
}
334

335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
/** Magic value for buf_t.magic, to catch pointer errors. */
#define BUFFER_MAGIC 0xB0FFF312u
/** A resizeable buffer, optimized for reading and writing. */
struct buf_t {
  uint32_t magic; /**< Magic cookie for debugging: Must be set to
                   *   BUFFER_MAGIC. */
  size_t datalen; /**< How many bytes is this buffer holding right now? */
  size_t default_chunk_size; /**< Don't allocate any chunks smaller than
                              * this for this buffer. */
  chunk_t *head; /**< First chunk in the list, or NULL for none. */
  chunk_t *tail; /**< Last chunk in the list, or NULL for none. */
};

/** Collapse data from the first N chunks from <b>buf</b> into buf->head,
 * growing it as necessary, until buf->head has the first <b>bytes</b> bytes
 * of data from the buffer, or until buf->head has all the data in <b>buf</b>.
 *
 * If <b>nulterminate</b> is true, ensure that there is a 0 byte in
 * buf->head->mem right after all the data. */
static void
buf_pullup(buf_t *buf, size_t bytes, int nulterminate)
{
  chunk_t *dest, *src;
  size_t capacity;
  if (!buf->head)
360
361
    return;

362
363
364
365
366
367
368
369
370
  check();
  if (buf->datalen < bytes)
    bytes = buf->datalen;

  if (nulterminate) {
    capacity = bytes + 1;
    if (buf->head->datalen >= bytes && CHUNK_REMAINING_CAPACITY(buf->head)) {
      *CHUNK_WRITE_PTR(buf->head) = '\0';
      return;
371
    }
372
373
374
375
  } else {
    capacity = bytes;
    if (buf->head->datalen >= bytes)
      return;
376
  }
377

378
379
380
381
382
  if (buf->head->memlen >= capacity) {
    /* We don't need to grow the first chunk, but we might need to repack it.*/
    if (CHUNK_REMAINING_CAPACITY(buf->head) < capacity-buf->datalen)
      chunk_repack(buf->head);
    tor_assert(CHUNK_REMAINING_CAPACITY(buf->head) >= capacity-buf->datalen);
383
  } else {
384
385
386
387
388
389
390
391
392
393
394
    chunk_t *newhead;
    size_t newsize;
    /* We need to grow the chunk. */
    chunk_repack(buf->head);
    newsize = CHUNK_SIZE_WITH_ALLOC(preferred_chunk_size(capacity));
    newhead = chunk_grow(buf->head, newsize);
    tor_assert(newhead->memlen >= capacity);
    if (newhead != buf->head) {
      if (buf->tail == buf->head)
        buf->tail = newhead;
      buf->head = newhead;
395
    }
396
397
  }

398
399
400
401
402
403
404
405
406
407
408
  dest = buf->head;
  while (dest->datalen < bytes) {
    size_t n = bytes - dest->datalen;
    src = dest->next;
    tor_assert(src);
    if (n > src->datalen) {
      memcpy(CHUNK_WRITE_PTR(dest), src->data, src->datalen);
      dest->datalen += src->datalen;
      dest->next = src->next;
      if (buf->tail == src)
        buf->tail = dest;
409
      chunk_free_unchecked(src);
410
411
412
413
414
415
416
    } else {
      memcpy(CHUNK_WRITE_PTR(dest), src->data, n);
      dest->datalen += n;
      src->data += n;
      src->datalen -= n;
      tor_assert(dest->datalen == bytes);
    }
417
  }
418
419
420
421

  if (nulterminate) {
    tor_assert(CHUNK_REMAINING_CAPACITY(buf->head));
    *CHUNK_WRITE_PTR(buf->head) = '\0';
422
  }
423

424
  check();
425
426
}

427
/** Resize buf so it won't hold extra memory that we haven't been
428
 * using lately.
Roger Dingledine's avatar
Roger Dingledine committed
429
 */
430
431
432
void
buf_shrink(buf_t *buf)
{
433
  (void)buf;
434
435
}

436
/** Remove the first <b>n</b> bytes from buf. */
437
static INLINE void
438
439
buf_remove_from_front(buf_t *buf, size_t n)
{
Roger Dingledine's avatar
Roger Dingledine committed
440
  tor_assert(buf->datalen >= n);
441
442
443
444
445
446
447
448
449
450
451
452
453
454
  while (n) {
    tor_assert(buf->head);
    if (buf->head->datalen > n) {
      buf->head->datalen -= n;
      buf->head->data += n;
      buf->datalen -= n;
      return;
    } else {
      chunk_t *victim = buf->head;
      n -= victim->datalen;
      buf->datalen -= victim->datalen;
      buf->head = victim->next;
      if (buf->tail == victim)
        buf->tail = NULL;
455
      chunk_free_unchecked(victim);
456
    }
457
  }
458
  check();
459
460
}

461
462
/** Create and return a new buf with default chunk capacity <b>size</b>.
 */
463
buf_t *
464
465
buf_new_with_capacity(size_t size)
{
466
467
468
  buf_t *b = buf_new();
  b->default_chunk_size = preferred_chunk_size(size);
  return b;
469
}
Roger Dingledine's avatar
Roger Dingledine committed
470

471
/** Allocate and return a new buffer with default capacity. */
472
473
buf_t *
buf_new(void)
474
{
475
476
477
478
  buf_t *buf = tor_malloc_zero(sizeof(buf_t));
  buf->magic = BUFFER_MAGIC;
  buf->default_chunk_size = 4096;
  return buf;
479
}
Roger Dingledine's avatar
Roger Dingledine committed
480

481
/** Remove all data from <b>buf</b>. */
482
483
void
buf_clear(buf_t *buf)
484
{
485
  chunk_t *chunk, *next;
486
  buf->datalen = 0;
487
488
  for (chunk = buf->head; chunk; chunk = next) {
    next = chunk->next;
489
    chunk_free_unchecked(chunk);
490
491
  }
  buf->head = buf->tail = NULL;
492
}
Roger Dingledine's avatar
Roger Dingledine committed
493

Roger Dingledine's avatar
Roger Dingledine committed
494
/** Return the number of bytes stored in <b>buf</b> */
495
496
size_t
buf_datalen(const buf_t *buf)
497
498
{
  return buf->datalen;
Roger Dingledine's avatar
Roger Dingledine committed
499
500
}

501
/** Return the total length of all chunks used in <b>buf</b>. */
502
size_t
503
buf_allocation(const buf_t *buf)
504
{
505
506
507
508
509
510
  size_t total = 0;
  const chunk_t *chunk;
  for (chunk = buf->head; chunk; chunk = chunk->next) {
    total += chunk->memlen;
  }
  return total;
511
512
}

513
514
515
516
/** Return the number of bytes that can be added to <b>buf</b> without
 * performing any additional allocation. */
size_t
buf_slack(const buf_t *buf)
517
{
518
519
520
521
  if (!buf->tail)
    return 0;
  else
    return CHUNK_REMAINING_CAPACITY(buf->tail);
522
523
}

524
/** Release storage held by <b>buf</b>. */
525
526
527
void
buf_free(buf_t *buf)
{
528
529
  if (!buf)
    return;
530
531
532
533
534
  buf_clear(buf);
  buf->magic = 0xdeadbeef;
  tor_free(buf);
}

535
536
537
/** Append a new chunk with enough capacity to hold <b>capacity</b> bytes to
 * the tail of <b>buf</b>.  If <b>capped</b>, don't allocate a chunk bigger
 * than MAX_CHUNK_ALLOC. */
538
static chunk_t *
539
buf_add_chunk_with_capacity(buf_t *buf, size_t capacity, int capped)
540
541
{
  chunk_t *chunk;
542
  if (CHUNK_ALLOC_SIZE(capacity) < buf->default_chunk_size) {
543
    chunk = chunk_new_with_alloc_size(buf->default_chunk_size);
544
545
  } else if (capped && CHUNK_ALLOC_SIZE(capacity) > MAX_CHUNK_ALLOC) {
    chunk = chunk_new_with_alloc_size(MAX_CHUNK_ALLOC);
546
  } else {
547
    chunk = chunk_new_with_alloc_size(preferred_chunk_size(capacity));
548
  }
549
550
551
552
553
554
555
  if (buf->tail) {
    tor_assert(buf->head);
    buf->tail->next = chunk;
    buf->tail = chunk;
  } else {
    tor_assert(!buf->head);
    buf->head = buf->tail = chunk;
556
  }
557
558
  check();
  return chunk;
Roger Dingledine's avatar
Roger Dingledine committed
559
560
}

561
562
/** If we're using readv and writev, how many chunks are we willing to
 * read/write at a time? */
563
564
#define N_IOV 3

565
/** Read up to <b>at_most</b> bytes from the socket <b>fd</b> into
566
 * <b>chunk</b> (which must be on <b>buf</b>). If we get an EOF, set
567
568
 * *<b>reached_eof</b> to 1.  Return -1 on error, 0 on eof or blocking,
 * and the number of bytes read otherwise. */
569
static INLINE int
570
read_to_chunk(buf_t *buf, chunk_t *chunk, int fd, size_t at_most,
571
              int *reached_eof, int *socket_error)
572
{
573
  ssize_t read_result;
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
#if 0 && defined(HAVE_READV) && !defined(WIN32)
  struct iovec iov[N_IOV];
  int i;
  size_t remaining = at_most;
  for (i=0; chunk && i < N_IOV && remaining; ++i) {
    iov[i].iov_base = CHUNK_WRITE_PTR(chunk);
    if (remaining > CHUNK_REMAINING_CAPACITY(chunk))
      iov[i].iov_len = CHUNK_REMAINING_CAPACITY(chunk);
    else
      iov[i].iov_len = remaining;
    remaining -= iov[i].iov_len;
    chunk = chunk->next;
  }
  read_result = readv(fd, iov, i);
#else
  if (at_most > CHUNK_REMAINING_CAPACITY(chunk))
    at_most = CHUNK_REMAINING_CAPACITY(chunk);
591
  read_result = tor_socket_recv(fd, CHUNK_WRITE_PTR(chunk), at_most, 0);
592
593
#endif

594
  if (read_result < 0) {
595
    int e = tor_socket_errno(fd);
596
    if (!ERRNO_IS_EAGAIN(e)) { /* it's a real error */
597
598
599
600
#ifdef MS_WINDOWS
      if (e == WSAENOBUFS)
        log_warn(LD_NET,"recv() failed: WSAENOBUFS. Not enough ram?");
#endif
601
      *socket_error = e;
602
603
604
605
      return -1;
    }
    return 0; /* would block. */
  } else if (read_result == 0) {
606
    log_debug(LD_NET,"Encountered eof on fd %d", (int)fd);
607
608
    *reached_eof = 1;
    return 0;
609
  } else { /* actually got bytes. */
610
    buf->datalen += read_result;
611
612
613
614
615
616
617
618
#if 0 && defined(HAVE_READV) && !defined(WIN32)
    while ((size_t)read_result > CHUNK_REMAINING_CAPACITY(chunk)) {
      chunk->datalen += CHUNK_REMAINING_CAPACITY(chunk);
      read_result -= CHUNK_REMAINING_CAPACITY(chunk);
      chunk = chunk->next;
      tor_assert(chunk);
    }
#endif
619
    chunk->datalen += read_result;
620
    log_debug(LD_NET,"Read %ld bytes. %d on inbuf.", (long)read_result,
621
              (int)buf->datalen);
622
623
    tor_assert(read_result < INT_MAX);
    return (int)read_result;
624
625
626
  }
}

627
628
/** As read_to_chunk(), but return (negative) error code on error, blocking,
 * or TLS, and the number of bytes read otherwise. */
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
static INLINE int
read_to_chunk_tls(buf_t *buf, chunk_t *chunk, tor_tls_t *tls,
                  size_t at_most)
{
  int read_result;

  tor_assert(CHUNK_REMAINING_CAPACITY(chunk) >= at_most);
  read_result = tor_tls_read(tls, CHUNK_WRITE_PTR(chunk), at_most);
  if (read_result < 0)
    return read_result;
  buf->datalen += read_result;
  chunk->datalen += read_result;
  return read_result;
}

Nick Mathewson's avatar
Nick Mathewson committed
644
/** Read from socket <b>s</b>, writing onto end of <b>buf</b>.  Read at most
645
646
647
 * <b>at_most</b> bytes, growing the buffer as necessary.  If recv() returns 0
 * (because of EOF), set *<b>reached_eof</b> to 1 and return 0. Return -1 on
 * error; else return the number of bytes read.
648
 */
649
/* XXXX021 indicate "read blocked" somehow? */
650
int
651
652
read_to_buf(int s, size_t at_most, buf_t *buf, int *reached_eof,
            int *socket_error)
653
{
654
  /* XXXX021 It's stupid to overload the return values for these functions:
655
656
   * "error status" and "number of bytes read" are not mutually exclusive.
   */
657
658
  int r = 0;
  size_t total_read = 0;
Roger Dingledine's avatar
Roger Dingledine committed
659

660
  check();
661
662
663
  tor_assert(reached_eof);
  tor_assert(s >= 0);

664
665
  while (at_most > total_read) {
    size_t readlen = at_most - total_read;
666
667
    chunk_t *chunk;
    if (!buf->tail || CHUNK_REMAINING_CAPACITY(buf->tail) < MIN_READ_LEN) {
668
669
670
      chunk = buf_add_chunk_with_capacity(buf, at_most, 1);
      if (readlen > chunk->memlen)
        readlen = chunk->memlen;
671
    } else {
672
673
674
675
      size_t cap = CHUNK_REMAINING_CAPACITY(buf->tail);
      chunk = buf->tail;
      if (cap < readlen)
        readlen = cap;
Roger Dingledine's avatar
Roger Dingledine committed
676
    }
677

678
    r = read_to_chunk(buf, chunk, s, readlen, reached_eof, socket_error);
679
680
681
    check();
    if (r < 0)
      return r; /* Error */
682
    tor_assert(total_read+r < INT_MAX);
683
    total_read += r;
684
685
686
    if ((size_t)r < readlen) { /* eof, block, or no more to read. */
      break;
    }
687
  }
688
  return (int)total_read;
Roger Dingledine's avatar
Roger Dingledine committed
689
690
}

691
692
/** As read_to_buf, but reads from a TLS connection, and returns a TLS
 * status value rather than the number of bytes read.
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
 *
 * Using TLS on OR connections complicates matters in two ways.
 *
 * First, a TLS stream has its own read buffer independent of the
 * connection's read buffer.  (TLS needs to read an entire frame from
 * the network before it can decrypt any data.  Thus, trying to read 1
 * byte from TLS can require that several KB be read from the network
 * and decrypted.  The extra data is stored in TLS's decrypt buffer.)
 * Because the data hasn't been read by Tor (it's still inside the TLS),
 * this means that sometimes a connection "has stuff to read" even when
 * poll() didn't return POLLIN. The tor_tls_get_pending_bytes function is
 * used in connection.c to detect TLS objects with non-empty internal
 * buffers and read from them again.
 *
 * Second, the TLS stream's events do not correspond directly to network
 * events: sometimes, before a TLS stream can read, the network must be
 * ready to write -- or vice versa.
710
 */
711
int
712
read_to_buf_tls(tor_tls_t *tls, size_t at_most, buf_t *buf)
713
{
714
715
  int r = 0;
  size_t total_read = 0;
716
  check();
717

718
719
  while (at_most > total_read) {
    size_t readlen = at_most - total_read;
720
721
    chunk_t *chunk;
    if (!buf->tail || CHUNK_REMAINING_CAPACITY(buf->tail) < MIN_READ_LEN) {
722
723
724
      chunk = buf_add_chunk_with_capacity(buf, at_most, 1);
      if (readlen > chunk->memlen)
        readlen = chunk->memlen;
725
726
727
728
729
730
731
732
    } else {
      size_t cap = CHUNK_REMAINING_CAPACITY(buf->tail);
      chunk = buf->tail;
      if (cap < readlen)
        readlen = cap;
    }

    r = read_to_chunk_tls(buf, chunk, tls, readlen);
733
    check();
734
735
    if (r < 0)
      return r; /* Error */
736
737
738
739
    tor_assert(total_read+r < INT_MAX);
     total_read += r;
    if ((size_t)r < readlen) /* eof, block, or no more to read. */
      break;
740
  }
741
  return (int)total_read;
Roger Dingledine's avatar
Roger Dingledine committed
742
}
743

744
745
746
747
/** Helper for flush_buf(): try to write <b>sz</b> bytes from chunk
 * <b>chunk</b> of buffer <b>buf</b> onto socket <b>s</b>.  On success, deduct
 * the bytes written from *<b>buf_flushlen</b>.  Return the number of bytes
 * written on success, 0 on blocking, -1 on failure.
748
 */
749
static INLINE int
750
flush_chunk(int s, buf_t *buf, chunk_t *chunk, size_t sz,
751
            size_t *buf_flushlen)
752
{
753
  ssize_t write_result;
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
#if 0 && defined(HAVE_WRITEV) && !defined(WIN32)
  struct iovec iov[N_IOV];
  int i;
  size_t remaining = sz;
  for (i=0; chunk && i < N_IOV && remaining; ++i) {
    iov[i].iov_base = chunk->data;
    if (remaining > chunk->datalen)
      iov[i].iov_len = chunk->datalen;
    else
      iov[i].iov_len = remaining;
    remaining -= iov[i].iov_len;
    chunk = chunk->next;
  }
  write_result = writev(s, iov, i);
#else
  if (sz > chunk->datalen)
    sz = chunk->datalen;
771
  write_result = tor_socket_send(s, chunk->data, sz, 0);
772
773
#endif

774
775
776
  if (write_result < 0) {
    int e = tor_socket_errno(s);
    if (!ERRNO_IS_EAGAIN(e)) { /* it's a real error */
777
778
779
780
#ifdef MS_WINDOWS
      if (e == WSAENOBUFS)
        log_warn(LD_NET,"write() failed: WSAENOBUFS. Not enough ram?");
#endif
781
782
      return -1;
    }
783
    log_debug(LD_NET,"write() would block, returning.");
784
785
786
787
    return 0;
  } else {
    *buf_flushlen -= write_result;
    buf_remove_from_front(buf, write_result);
788
789
    tor_assert(write_result < INT_MAX);
    return (int)write_result;
790
791
792
  }
}

793
794
795
796
/** Helper for flush_buf_tls(): try to write <b>sz</b> bytes from chunk
 * <b>chunk</b> of buffer <b>buf</b> onto socket <b>s</b>.  (Tries to write
 * more if there is a forced pending write size.)  On success, deduct the
 * bytes written from *<b>buf_flushlen</b>.  Return the number of bytes
Nick Mathewson's avatar
Nick Mathewson committed
797
 * written on success, and a TOR_TLS error code on failure or blocking.
798
 */
799
800
static INLINE int
flush_chunk_tls(tor_tls_t *tls, buf_t *buf, chunk_t *chunk,
801
                size_t sz, size_t *buf_flushlen)
802
803
804
{
  int r;
  size_t forced;
805
  char *data;
806
807
808
809

  forced = tor_tls_get_forced_write_size(tls);
  if (forced > sz)
    sz = forced;
810
811
812
813
814
815
816
817
  if (chunk) {
    data = chunk->data;
    tor_assert(sz <= chunk->datalen);
  } else {
    data = NULL;
    tor_assert(sz == 0);
  }
  r = tor_tls_write(tls, data, sz);
818
819
  if (r < 0)
    return r;
820
821
822
823
  if (*buf_flushlen > (size_t)r)
    *buf_flushlen -= r;
  else
    *buf_flushlen = 0;
824
825
826
827
828
829
  buf_remove_from_front(buf, r);
  log_debug(LD_NET,"flushed %d bytes, %d ready to flush, %d remain.",
            r,(int)*buf_flushlen,(int)buf->datalen);
  return r;
}

Roger Dingledine's avatar
Roger Dingledine committed
830
/** Write data from <b>buf</b> to the socket <b>s</b>.  Write at most
831
 * <b>sz</b> bytes, decrement *<b>buf_flushlen</b> by
Roger Dingledine's avatar
Roger Dingledine committed
832
833
834
 * the number of bytes actually written, and remove the written bytes
 * from the buffer.  Return the number of bytes written on success,
 * -1 on failure.  Return 0 if write() would block.
835
 */
836
int
837
flush_buf(int s, buf_t *buf, size_t sz, size_t *buf_flushlen)
838
{
839
  /* XXXX021 It's stupid to overload the return values for these functions:
840
841
   * "error status" and "number of bytes flushed" are not mutually exclusive.
   */
842
843
  int r;
  size_t flushed = 0;
844
  tor_assert(buf_flushlen);
845
  tor_assert(s >= 0);
846
  tor_assert(*buf_flushlen <= buf->datalen);
847
  tor_assert(sz <= *buf_flushlen);
Roger Dingledine's avatar
Roger Dingledine committed
848

849
  check();
850
851
852
853
854
855
856
  while (sz) {
    size_t flushlen0;
    tor_assert(buf->head);
    if (buf->head->datalen >= sz)
      flushlen0 = sz;
    else
      flushlen0 = buf->head->datalen;
857

858
    r = flush_chunk(s, buf, buf->head, flushlen0, buf_flushlen);
859
    check();
860
    if (r < 0)
861
862
      return r;
    flushed += r;
863
    sz -= r;
864
865
    if (r == 0 || (size_t)r < flushlen0) /* can't flush any more now. */
      break;
866
  }
867
868
  tor_assert(flushed < INT_MAX);
  return (int)flushed;
869
}
870

871
872
/** As flush_buf(), but writes data to a TLS connection.  Can write more than
 * <b>flushlen</b> bytes.
873
 */
874
int
875
876
flush_buf_tls(tor_tls_t *tls, buf_t *buf, size_t flushlen,
              size_t *buf_flushlen)
877
878
{
  int r;
879
  size_t flushed = 0;
880
  ssize_t sz;
881
  tor_assert(buf_flushlen);
882
  tor_assert(*buf_flushlen <= buf->datalen);
883
884
  tor_assert(flushlen <= *buf_flushlen);
  sz = (ssize_t) flushlen;
Roger Dingledine's avatar
Roger Dingledine committed
885
886
887

  /* we want to let tls write even if flushlen is zero, because it might
   * have a partial record pending */
888
  check_no_tls_errors();
889

890
  check();
891
  do {
892
    size_t flushlen0;
893
    if (buf->head) {
894
      if ((ssize_t)buf->head->datalen >= sz)
895
896
897
898
899
900
        flushlen0 = sz;
      else
        flushlen0 = buf->head->datalen;
    } else {
      flushlen0 = 0;
    }
901

902
    r = flush_chunk_tls(tls, buf, buf->head, flushlen0, buf_flushlen);
903
    check();
904
    if (r < 0)
905
906
      return r;
    flushed += r;
907
    sz -= r;
908
909
    if (r == 0) /* Can't flush any more now. */
      break;
910
  } while (sz > 0);
911
912
  tor_assert(flushed < INT_MAX);
  return (int)flushed;
913
914
}

Roger Dingledine's avatar
Roger Dingledine committed
915
916
917
/** Append <b>string_len</b> bytes from <b>string</b> to the end of
 * <b>buf</b>.
 *
918
919
 * Return the new length of the buffer on success, -1 on failure.
 */
920
921
922
int
write_to_buf(const char *string, size_t string_len, buf_t *buf)
{
923
  if (!string_len)
924
    return (int)buf->datalen;
925
  check();
Roger Dingledine's avatar
Roger Dingledine committed
926

927
928
929
930
931
932
  while (string_len) {
    size_t copy;
    if (!buf->tail || !CHUNK_REMAINING_CAPACITY(buf->tail))
      buf_add_chunk_with_capacity(buf, string_len, 1);

    copy = CHUNK_REMAINING_CAPACITY(buf->tail);
933
934
935
936
937
938
939
    if (copy > string_len)
      copy = string_len;
    memcpy(CHUNK_WRITE_PTR(buf->tail), string, copy);
    string_len -= copy;
    string += copy;
    buf->datalen += copy;
    buf->tail->datalen += copy;
940
  }
941

942
  check();
943
944
  tor_assert(buf->datalen < INT_MAX);
  return (int)buf->datalen;
945
946
}

947
948
/** Helper: copy the first <b>string_len</b> bytes from <b>buf</b>
 * onto <b>string</b>.
949
950
 */
static INLINE void
951
peek_from_buf(char *string, size_t string_len, const buf_t *buf)
952
{
953
  chunk_t *chunk;
Roger Dingledine's avatar
Roger Dingledine committed
954

Roger Dingledine's avatar
Roger Dingledine committed
955
  tor_assert(string);
956
957
  /* make sure we don't ask for too much */
  tor_assert(string_len <= buf->datalen);
958
  /* assert_buf_ok(buf); */
Roger Dingledine's avatar
Roger Dingledine committed
959

960
961
962
963
964
965
966
967
968
969
  chunk = buf->head;
  while (string_len) {
    size_t copy = string_len;
    tor_assert(chunk);
    if (chunk->datalen < copy)
      copy = chunk->datalen;
    memcpy(string, chunk->data, copy);
    string_len -= copy;
    string += copy;
    chunk = chunk->next;
970
971
972
  }
}

973
974
975
/** Remove <b>string_len</b> bytes from the front of <b>buf</b>, and store
 * them into <b>string</b>.  Return the new buffer size.  <b>string_len</b>
 * must be \<= the number of bytes on the buffer.
976
 */
977
978
int
fetch_from_buf(char *string, size_t string_len, buf_t *buf)
979
980
981
982
983
984
{
  /* There must be string_len bytes in buf; write them onto string,
   * then memmove buf back (that is, remove them from buf).
   *
   * Return the number of bytes still on the buffer. */

985
  check();
986
  peek_from_buf(string, string_len, buf);
987
  buf_remove_from_front(buf, string_len);
988
  check();
989
990
  tor_assert(buf->datalen < INT_MAX);
  return (int)buf->datalen;
Roger Dingledine's avatar
Roger Dingledine committed
991
992
}

993
994
995
996
997
998
999
/** Check <b>buf</b> for a variable-length cell according to the rules of link
 * protocol version <b>linkproto</b>.  If one is found, pull it off the buffer
 * and assign a newly allocated var_cell_t to *<b>out</b>, and return 1.
 * Return 0 if whatever is on the start of buf_t is not a variable-length
 * cell.  Return 1 and set *<b>out</b> to NULL if there seems to be the start
 * of a variable-length cell on <b>buf</b>, but the whole thing isn't there
 * yet. */
1000
int