Commit ee925601 authored by Mike Hommey's avatar Mike Hommey
Browse files

Bug 686805 part 3 - Tool to generate seekable compressed streams. r=tglek

parent 3c9f4651
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -19,6 +19,17 @@ CPPSRCS = \
  $(NULL)

ifndef MOZ_OLD_LINKER
HOST_PROGRAM = szip

HOST_CPPSRCS = \
  szip.cpp \
  Assertions.cpp \
  $(NULL)

VPATH += $(topsrcdir)/mfbt

HOST_LIBS = -lz

CPPSRCS += \
  ElfLoader.cpp \
  CustomElf.cpp \
+48 −0
Original line number Diff line number Diff line
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef SeekableZStream_h
#define SeekableZStream_h

#include "Zip.h"

/**
 * Seekable compressed stream are created by splitting the original
 * decompressed data in small chunks and compress these chunks
 * individually.
 *
 * The seekable compressed file format consists in a header defined below,
 * followed by a table of 32-bits words containing the offsets for each
 * individual compressed chunk, then followed by the compressed chunks.
 */

#pragma pack(1)
struct SeekableZStreamHeader: public Zip::SignedEntity<SeekableZStreamHeader>
{
  SeekableZStreamHeader()
  : Zip::SignedEntity<SeekableZStreamHeader>(magic)
  , totalSize(0), chunkSize(0), nChunks(0), lastChunkSize(0) { }

  /* Reuse Zip::SignedEntity to handle the magic number used in the Seekable
   * ZStream file format. The magic number is "SeZz". */
  static const uint32_t magic = 0x7a5a6553;

  /* Total size of the stream, including the 4 magic bytes. */
  le_uint32 totalSize;

  /* Chunk size */
  le_uint32 chunkSize;

  /* Number of chunks */
  le_uint32 nChunks;

  /* Size of last chunk (> 0, <= Chunk size) */
  le_uint32 lastChunkSize;
};
#pragma pack()

MOZ_STATIC_ASSERT(sizeof(SeekableZStreamHeader) == 5 * 4,
                  "SeekableZStreamHeader should be 5 32-bits words");

#endif /* SeekableZStream_h */
+29 −3
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@
 * we can use direct type, but on others, we want to have a special class
 * to handle conversion and alignment issues.
 */
#if defined(__i386__) || defined(__x86_64__)
#if !defined(DEBUG) && (defined(__i386__) || defined(__x86_64__))
typedef uint16_t le_uint16;
typedef uint32_t le_uint32;
#else
@@ -29,17 +29,43 @@ template <> struct UInt<16> { typedef uint16_t Type; };
template <> struct UInt<32> { typedef uint32_t Type; };

/**
 * Template to read 2 n-bit sized words as a 2*n-bit sized word, doing
 * Template to access 2 n-bit sized words as a 2*n-bit sized word, doing
 * conversion from little endian and avoiding alignment issues.
 */
template <typename T>
class le_to_cpu
{
public:
  operator typename UInt<16 * sizeof(T)>::Type() const
  typedef typename UInt<16 * sizeof(T)>::Type Type;

  operator Type() const
  {
    return (b << (sizeof(T) * 8)) | a;
  }

  const le_to_cpu& operator =(const Type &v)
  {
    a = v & ((1 << (sizeof(T) * 8)) - 1);
    b = v >> (sizeof(T) * 8);
    return *this;
  }

  le_to_cpu() { }
  le_to_cpu(const Type &v)
  {
    operator =(v);
  }

  const le_to_cpu& operator +=(const Type &v)
  {
    return operator =(operator Type() + v);
  }

  const le_to_cpu& operator ++(int)
  {
    return operator =(operator Type() + 1);
  }

private:
  T a, b;
};
+4 −0
Original line number Diff line number Diff line
@@ -142,6 +142,7 @@ private:

/* All the following types need to be packed */
#pragma pack(1)
public:
  /**
   * A Zip archive is an aggregate of entities which all start with a
   * signature giving their type. This template is to be used as a base
@@ -162,10 +163,13 @@ private:
        return ret;
      return NULL;
    }

    SignedEntity(uint32_t magic): signature(magic) { }
  private:
    le_uint32 signature;
  };

private:
  /**
   * Header used to describe a Local File entry. The header is followed by
   * the file name and an extra field, then by the data stream.
+136 −0
Original line number Diff line number Diff line
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#include <algorithm>
#include <sys/stat.h>
#include <cstring>
#include <zlib.h>
#include <fcntl.h>
#include <errno.h>
#include "mozilla/Assertions.h"
#include "SeekableZStream.h"
#include "Utils.h"
#include "Logging.h"

static const size_t CHUNK = 16384;

/* Generate a seekable compressed stream. */

int main(int argc, char* argv[])
{
  if (argc != 3 || !argv[1] || !argv[2] || (strcmp(argv[1], argv[2]) == 0)) {
    log("usage: %s file_to_compress out_file", argv[0]);
    return 1;
  }

  AutoCloseFD origFd = open(argv[1], O_RDONLY);
  if (origFd == -1) {
    log("Couldn't open %s: %s", argv[1], strerror(errno));
    return 1;
  }

  struct stat st;
  int ret = fstat(origFd, &st);
  if (ret == -1) {
    log("Couldn't seek %s: %s", argv[1], strerror(errno));
    return 1;
  }

  size_t origSize = st.st_size;
  log("Size = %lu", origSize);
  if (origSize == 0) {
    log("Won't compress %s: it's empty", argv[1]);
    return 1;
  }

  /* Mmap the original file */
  MappedPtr origBuf;
  origBuf.Assign(mmap(NULL, origSize, PROT_READ, MAP_PRIVATE, origFd, 0), origSize);
  if (origBuf == MAP_FAILED) {
    log("Couldn't mmap %s: %s", argv[1], strerror(errno));
    return 1;
  }

  /* Create the compressed file */
  AutoCloseFD outFd = open(argv[2], O_RDWR | O_CREAT | O_TRUNC, 0666);
  if (outFd == -1) {
    log("Couldn't open %s: %s", argv[2], strerror(errno));
    return 1;
  }

  /* Expected total number of chunks */
  size_t nChunks = ((origSize + CHUNK - 1) / CHUNK);

  /* The first chunk is going to be stored after the header and the offset
   * table */
  size_t offset = sizeof(SeekableZStreamHeader) + nChunks * sizeof(uint32_t);

  /* Give enough room for the header and the offset table, and map them */
  ret = posix_fallocate(outFd, 0, offset);
  MOZ_ASSERT(ret == 0);
  MappedPtr headerMap;
  headerMap.Assign(mmap(NULL, offset, PROT_READ | PROT_WRITE, MAP_SHARED,
                        outFd, 0), offset);
  if (headerMap == MAP_FAILED) {
    log("Couldn't mmap %s: %s", argv[1], strerror(errno));
    return 1;
  }

  SeekableZStreamHeader *header = new (headerMap) SeekableZStreamHeader;
  le_uint32 *entry = reinterpret_cast<le_uint32 *>(&header[1]);

  /* Initialize header */
  header->chunkSize = CHUNK;
  header->totalSize = offset;

  /* Seek at the end of the output file, where we're going to append
   * compressed streams */
  lseek(outFd, offset, SEEK_SET);

  /* Initialize zlib structure */
  z_stream zStream;
  memset(&zStream, 0, sizeof(zStream));

  /* Compression buffer */
  AutoDeleteArray<Bytef> outBuf = new Bytef[CHUNK * 2];

  Bytef *origData = static_cast<Bytef*>(origBuf);
  size_t avail = 0;
  while (origSize) {
    avail = std::min(origSize, CHUNK);

    /* Compress chunk */
    ret = deflateInit(&zStream, 9);
    MOZ_ASSERT(ret == Z_OK);
    zStream.avail_in = avail;
    zStream.next_in = origData;
    zStream.avail_out = CHUNK * 2;
    zStream.next_out = outBuf;
    ret = deflate(&zStream, Z_FINISH);
    MOZ_ASSERT(ret == Z_STREAM_END);
    ret = deflateEnd(&zStream);
    MOZ_ASSERT(ret == Z_OK);
    MOZ_ASSERT(zStream.avail_out > 0);

    /* Write chunk */
    size_t len = write(outFd, outBuf, 2 * CHUNK - zStream.avail_out);
    MOZ_ASSERT(len == 2 * CHUNK - zStream.avail_out);

    /* Adjust headers */
    header->totalSize += len;
    *entry++ = offset;
    header->nChunks++;

    /* Prepare for next iteration */
    origSize -= avail;
    origData += avail;
    offset += len;
  }
  header->lastChunkSize = avail;

  MOZ_ASSERT(header->nChunks == nChunks);
  log("Compressed size is %lu", offset);

  return 0;
}