Commit 59da856c authored by Gian-Carlo Pascutto's avatar Gian-Carlo Pascutto
Browse files

Bug 730247 - Use byteslice coding for SafeBrowsing data. r=dcamp

parent c6306a4d
Loading
Loading
Loading
Loading
+117 −36
Original line number Diff line number Diff line
@@ -40,13 +40,36 @@
#include "zlib.h"

// Main store for SafeBrowsing protocol data. We store
// known add/sub chunks, prefixe and completions s in memory
// known add/sub chunks, prefixes and completions in memory
// during an update, and serialize to disk.
// We do not store the add prefixes, those are retrieved by
// decompressing the PrefixSet cache whenever we need to apply
// an update.

// Data format:
//
// byte slicing: Many of the 4-byte values stored here are strongly
// correlated in the upper bytes, and uncorrelated in the lower
// bytes. Because zlib/DEFLATE requires match lengths of at least
// 3 to achieve good compression, and we don't get those if only
// the upper 16-bits are correlated, it is worthwhile to slice 32-bit
// values into 4 1-byte slices and compress the slices individually.
// The slices corresponding to MSBs will compress very well, and the
// slice corresponding to LSB almost nothing. Because of this, we
// only apply DEFLATE to the 3 most significant bytes, and store the
// LSB uncompressed.
//
// byte sliced (numValues) data format:
//    uint32 compressed-size
//    compressed-size bytes    zlib DEFLATE data
//        0...numValues        byte MSB of 4-byte numValues data
//    uint32 compressed-size
//    compressed-size bytes    zlib DEFLATE data
//        0...numValues        byte 2nd byte of 4-byte numValues data
//    uint32 compressed-size
//    compressed-size bytes    zlib DEFLATE data
//        0...numValues        byte 3rd byte of 4-byte numValues data
//    0...numValues            byte LSB of 4-byte numValues data
//
// Store data format:
//    uint32 magic
//    uint32 version
//    uint32 numAddChunks
@@ -57,16 +80,10 @@
//    uint32 numSubCompletes
//    0...numAddChunks               uint32 addChunk
//    0...numSubChunks               uint32 subChunk
//    uint32 compressed-size
//        compressed-size bytes zlib inflate data
//        0...numAddPrefixes    uint32 addChunk
//    uint32 compressed-size
//        compressed-size bytes zlib inflate data
//        0...numSubPrefixes    uint32 addChunk
//    uint32 compressed-size
//        compressed-size bytes zlib inflate data
//        0...numSubPrefixes    uint32 subChunk
//    0...numSubPrefixes        uint32 subPrefix
//    byte sliced (numAddPrefixes)   uint32 add chunk of AddPrefixes
//    byte sliced (numSubPrefixes)   uint32 sub chunk of SubPrefixes
//    byte sliced (numSubPrefixes)   uint32 add chunk of SubPrefixes
//    byte sliced (numSubPrefixes)   uint32 SubPrefixes
//    0...numAddCompletes           32-byte Completions
//    0...numSubCompletes           32-byte Completions
//    16-byte MD5 of all preceding data
@@ -85,7 +102,7 @@ namespace mozilla {
namespace safebrowsing {

const uint32 STORE_MAGIC = 0x1231af3b;
const uint32 CURRENT_VERSION = 1;
const uint32 CURRENT_VERSION = 2;

void
TableUpdate::NewAddPrefix(PRUint32 aAddChunk, const Prefix& aHash)
@@ -642,17 +659,82 @@ nsresult InflateReadTArray(nsIInputStream* aStream, nsTArray<T>* aOut,
  return NS_OK;
}

static nsresult
ByteSliceWrite(nsIOutputStream* aOut, nsTArray<PRUint32>& aData)
{
  nsTArray<PRUint8> slice1;
  nsTArray<PRUint8> slice2;
  nsTArray<PRUint8> slice3;
  nsTArray<PRUint8> slice4;
  PRUint32 count = aData.Length();

  slice1.SetCapacity(count);
  slice2.SetCapacity(count);
  slice3.SetCapacity(count);
  slice4.SetCapacity(count);

  for (PRUint32 i = 0; i < count; i++) {
    slice1.AppendElement( aData[i] >> 24);
    slice2.AppendElement((aData[i] >> 16) & 0xFF);
    slice3.AppendElement((aData[i] >>  8) & 0xFF);
    slice4.AppendElement( aData[i]        & 0xFF);
  }

  nsresult rv = DeflateWriteTArray(aOut, slice1);
  NS_ENSURE_SUCCESS(rv, rv);
  rv = DeflateWriteTArray(aOut, slice2);
  NS_ENSURE_SUCCESS(rv, rv);
  rv = DeflateWriteTArray(aOut, slice3);
  NS_ENSURE_SUCCESS(rv, rv);
  // The LSB slice is generally uncompressible, don't bother
  // compressing it.
  rv = WriteTArray(aOut, slice4);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

static nsresult
ByteSliceRead(nsIInputStream* aInStream, nsTArray<PRUint32>* aData, PRUint32 count)
{
  nsTArray<PRUint8> slice1;
  nsTArray<PRUint8> slice2;
  nsTArray<PRUint8> slice3;
  nsTArray<PRUint8> slice4;

  nsresult rv = InflateReadTArray(aInStream, &slice1, count);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = InflateReadTArray(aInStream, &slice2, count);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = InflateReadTArray(aInStream, &slice3, count);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ReadTArray(aInStream, &slice4, count);
  NS_ENSURE_SUCCESS(rv, rv);

  aData->SetCapacity(count);

  for (uint32 i = 0; i < count; i++) {
    aData->AppendElement((slice1[i] << 24) | (slice2[i] << 16)
                         | (slice3[i] << 8) | (slice4[i]));
  }

  return NS_OK;
}

nsresult
HashStore::ReadAddPrefixes()
{
  nsTArray<uint32> chunks;
  nsTArray<PRUint32> chunks;
  PRUint32 count = mHeader.numAddPrefixes;

  nsresult rv = InflateReadTArray(mInputStream, &chunks, count);
  nsresult rv = ByteSliceRead(mInputStream, &chunks, count);
  NS_ENSURE_SUCCESS(rv, rv);

  mAddPrefixes.SetCapacity(count);
  for (uint32 i = 0; i < count; i++) {
  for (PRUint32 i = 0; i < count; i++) {
    AddPrefix *add = mAddPrefixes.AppendElement();
    add->prefix.FromUint32(0);
    add->addChunk = chunks[i];
@@ -666,23 +748,23 @@ HashStore::ReadSubPrefixes()
{
  nsTArray<PRUint32> addchunks;
  nsTArray<PRUint32> subchunks;
  nsTArray<Prefix> prefixes;
  nsTArray<PRUint32> prefixes;
  PRUint32 count = mHeader.numSubPrefixes;

  nsresult rv = InflateReadTArray(mInputStream, &addchunks, count);
  nsresult rv = ByteSliceRead(mInputStream, &addchunks, count);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = InflateReadTArray(mInputStream, &subchunks, count);
  rv = ByteSliceRead(mInputStream, &subchunks, count);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ReadTArray(mInputStream, &prefixes, count);
  rv = ByteSliceRead(mInputStream, &prefixes, count);
  NS_ENSURE_SUCCESS(rv, rv);

  mSubPrefixes.SetCapacity(count);
  for (uint32 i = 0; i < count; i++) {
    SubPrefix *sub = mSubPrefixes.AppendElement();
    sub->addChunk = addchunks[i];
    sub->prefix = prefixes[i];
    sub->prefix.FromUint32(prefixes[i]);
    sub->subChunk = subchunks[i];
  }

@@ -693,7 +775,7 @@ HashStore::ReadSubPrefixes()
nsresult
HashStore::WriteAddPrefixes(nsIOutputStream* aOut)
{
  nsTArray<uint32> chunks;
  nsTArray<PRUint32> chunks;
  PRUint32 count = mAddPrefixes.Length();
  chunks.SetCapacity(count);

@@ -701,7 +783,7 @@ HashStore::WriteAddPrefixes(nsIOutputStream* aOut)
    chunks.AppendElement(mAddPrefixes[i].Chunk());
  }

  nsresult rv = DeflateWriteTArray(aOut, chunks);
  nsresult rv = ByteSliceWrite(aOut, chunks);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
@@ -710,9 +792,9 @@ HashStore::WriteAddPrefixes(nsIOutputStream* aOut)
nsresult
HashStore::WriteSubPrefixes(nsIOutputStream* aOut)
{
  nsTArray<uint32> addchunks;
  nsTArray<uint32> subchunks;
  nsTArray<Prefix> prefixes;
  nsTArray<PRUint32> addchunks;
  nsTArray<PRUint32> subchunks;
  nsTArray<PRUint32> prefixes;
  PRUint32 count = mSubPrefixes.Length();
  addchunks.SetCapacity(count);
  subchunks.SetCapacity(count);
@@ -720,18 +802,17 @@ HashStore::WriteSubPrefixes(nsIOutputStream* aOut)

  for (uint32 i = 0; i < count; i++) {
    addchunks.AppendElement(mSubPrefixes[i].AddChunk());
    prefixes.AppendElement(mSubPrefixes[i].PrefixHash());
    prefixes.AppendElement(mSubPrefixes[i].PrefixHash().ToUint32());
    subchunks.AppendElement(mSubPrefixes[i].Chunk());
  }

  nsresult rv = DeflateWriteTArray(aOut, addchunks);
  nsresult rv = ByteSliceWrite(aOut, addchunks);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = DeflateWriteTArray(aOut, subchunks);
  rv = ByteSliceWrite(aOut, subchunks);
  NS_ENSURE_SUCCESS(rv, rv);

  // chunk-ordered prefixes are not compressible
  rv = WriteTArray(aOut, prefixes);
  rv = ByteSliceWrite(aOut, prefixes);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;