Commit a9edb0b4 authored by Mike Perry's avatar Mike Perry
Browse files

More gracefully handle corrupt state files.

Save a backup if we get odd circuitbuildtimes and other state info.

In the case of circuit build times, we no longer assert, and reset our state.
parent 7bbdf71a
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -29,5 +29,7 @@
     parameter and via a LearnCircuitBuildTimeout config option. Also
     automatically disable circuit build time calculation if we are either
     a AuthoritativeDirectory, or if we fail to write our state file. Bug 1296.
   - More gracefully handle corrupt state files, removing asserts in favor
     of saving a backup and resetting state.

+23 −4
Original line number Diff line number Diff line
@@ -193,12 +193,11 @@ circuit_build_times_new_consensus_params(circuit_build_times_t *cbt,
  int32_t num = networkstatus_get_param(ns, "cbtrecentcount",
                   CBT_DEFAULT_RECENT_CIRCUITS);

  if (num != cbt->liveness.num_recent_circs) {
  if (num > 0 && num != cbt->liveness.num_recent_circs) {
    int8_t *recent_circs;
    log_notice(LD_CIRC, "Changing recent timeout size from %d to %d",
               cbt->liveness.num_recent_circs, num);

    tor_assert(num > 0);
    tor_assert(cbt->liveness.timeouts_after_firsthop);

    /*
@@ -675,6 +674,10 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
            "Corrupt state file? Build times count mismatch. "
            "Read %d times, but file says %d", loaded_cnt,
            state->TotalBuildTimes);
    *msg = tor_strdup("Build times count mismatch.");
    circuit_build_times_reset(cbt);
    tor_free(loaded_times);
    return -1;
  }

  circuit_build_times_shuffle_and_store_array(cbt, loaded_times, loaded_cnt);
@@ -688,8 +691,18 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
  log_info(LD_CIRC,
           "Loaded %d/%d values from %d lines in circuit time histogram",
           tot_values, cbt->total_build_times, N);
  tor_assert(cbt->total_build_times == tot_values);
  tor_assert(cbt->total_build_times <= CBT_NCIRCUITS_TO_OBSERVE);

  if (cbt->total_build_times != tot_values
        || cbt->total_build_times > CBT_NCIRCUITS_TO_OBSERVE) {
    log_warn(LD_CIRC,
            "Corrupt state file? Shuffled build times mismatch. "
            "Read %d times, but file says %d", tot_values,
            state->TotalBuildTimes);
    *msg = tor_strdup("Build times count mismatch.");
    circuit_build_times_reset(cbt);
    tor_free(loaded_times);
    return -1;
  }

  circuit_build_times_set_timeout(cbt);

@@ -742,6 +755,12 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
    n++;
  }

  /*
   * We are erring and asserting here because this can only happen
   * in codepaths other than startup. The startup state parsing code
   * performs this same check, and resets state if it hits it. If we
   * hit it at runtime, something serious has gone wrong.
   */
  if (n!=cbt->total_build_times) {
    log_err(LD_CIRC, "Discrepancy in build times count: %d vs %d", n,
            cbt->total_build_times);
+44 −27
Original line number Diff line number Diff line
@@ -4837,25 +4837,63 @@ or_state_validate(or_state_t *old_state, or_state_t *state,
}

/** Replace the current persistent state with <b>new_state</b> */
static void
static int
or_state_set(or_state_t *new_state)
{
  char *err = NULL;
  int ret = 0;
  tor_assert(new_state);
  config_free(&state_format, global_state);
  global_state = new_state;
  if (entry_guards_parse_state(global_state, 1, &err)<0) {
    log_warn(LD_GENERAL,"%s",err);
    tor_free(err);
    ret = -1;
  }
  if (rep_hist_load_state(global_state, &err)<0) {
    log_warn(LD_GENERAL,"Unparseable bandwidth history state: %s",err);
    tor_free(err);
    ret = -1;
  }
  if (circuit_build_times_parse_state(&circ_times, global_state, &err) < 0) {
    log_warn(LD_GENERAL,"%s",err);
    tor_free(err);
    ret = -1;
  }
  return ret;
}

/**
 * Save a broken state file to a backup location.
 */
static void
or_state_save_broken(char *fname)
{
  int i;
  file_status_t status;
  size_t len = strlen(fname)+16;
  char *fname2 = tor_malloc(len);
  for (i = 0; i < 100; ++i) {
    tor_snprintf(fname2, len, "%s.%d", fname, i);
    status = file_status(fname2);
    if (status == FN_NOENT)
      break;
  }
  if (i == 100) {
    log_warn(LD_BUG, "Unable to parse state in \"%s\"; too many saved bad "
             "state files to move aside. Discarding the old state file.",
             fname);
    unlink(fname);
  } else {
    log_warn(LD_BUG, "Unable to parse state in \"%s\". Moving it aside "
             "to \"%s\".  This could be a bug in Tor; please tell "
             "the developers.", fname, fname2);
    if (rename(fname, fname2) < 0) {
      log_warn(LD_BUG, "Weirdly, I couldn't even move the state aside. The "
               "OS gave an error of %s", strerror(errno));
    }
  }
  tor_free(fname2);
}

/** Reload the persistent state from disk, generating a new state as needed.
@@ -4917,31 +4955,8 @@ or_state_load(void)
             " This is a bug in Tor.");
    goto done;
  } else if (badstate && contents) {
    int i;
    file_status_t status;
    size_t len = strlen(fname)+16;
    char *fname2 = tor_malloc(len);
    for (i = 0; i < 100; ++i) {
      tor_snprintf(fname2, len, "%s.%d", fname, i);
      status = file_status(fname2);
      if (status == FN_NOENT)
        break;
    }
    if (i == 100) {
      log_warn(LD_BUG, "Unable to parse state in \"%s\"; too many saved bad "
               "state files to move aside. Discarding the old state file.",
               fname);
      unlink(fname);
    } else {
      log_warn(LD_BUG, "Unable to parse state in \"%s\". Moving it aside "
               "to \"%s\".  This could be a bug in Tor; please tell "
               "the developers.", fname, fname2);
      if (rename(fname, fname2) < 0) {
        log_warn(LD_BUG, "Weirdly, I couldn't even move the state aside. The "
                 "OS gave an error of %s", strerror(errno));
      }
    }
    tor_free(fname2);
    or_state_save_broken(fname);

    tor_free(contents);
    config_free(&state_format, new_state);

@@ -4953,7 +4968,9 @@ or_state_load(void)
  } else {
    log_info(LD_GENERAL, "Initialized state");
  }
  or_state_set(new_state);
  if (or_state_set(new_state) == -1) {
    or_state_save_broken(fname);
  }
  new_state = NULL;
  if (!contents) {
    global_state->next_write = 0;