Commit 1a327125 authored by Andrew Osmond's avatar Andrew Osmond
Browse files

Bug 1622220 - Add UnpremultiplyRow and extend SwizzleRow. r=lsalzman

UnpremultiplyRow will be used in the image encoders to reverse
premultiplication. SwizzleRow needs to support copying (no swizzling)
and swapping RGB/BGR.

Differential Revision: https://phabricator.services.mozilla.com/D66743

--HG--
extra : moz-landing-system : lando
parent c37e0f6c
Loading
Loading
Loading
Loading
+203 −34
Original line number Diff line number Diff line
@@ -134,6 +134,14 @@ void Unpremultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
    FORMAT_CASE(aSrcFormat, aDstFormat,              \
                Unpremultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)

template <bool aSwapRB>
void UnpremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t);

#  define UNPREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \
    FORMAT_CASE_ROW(                                     \
        aSrcFormat, aDstFormat,                          \
        UnpremultiplyRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)

template <bool aSwapRB, bool aOpaqueAlpha>
void Swizzle_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);

@@ -198,6 +206,14 @@ void Unpremultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
    FORMAT_CASE(aSrcFormat, aDstFormat,              \
                Unpremultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)

template <bool aSwapRB>
void UnpremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t);

#  define UNPREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \
    FORMAT_CASE_ROW(                                     \
        aSrcFormat, aDstFormat,                          \
        UnpremultiplyRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)

template <bool aSwapRB, bool aOpaqueAlpha>
void Swizzle_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);

@@ -493,11 +509,9 @@ static const uint32_t sUnpremultiplyTable[256] = {0,
// shifting/masking to access components.
template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
          uint32_t aDstRGBIndex, uint32_t aDstAIndex>
static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
                                  uint8_t* aDst, int32_t aDstGap,
                                  IntSize aSize) {
  for (int32_t height = aSize.height; height > 0; height--) {
    const uint8_t* end = aSrc + 4 * aSize.width;
static void UnpremultiplyChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst,
                                       int32_t aLength) {
  const uint8_t* end = aSrc + 4 * aLength;
  do {
    uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
    uint8_t g = aSrc[aSrcRGBIndex + 1];
@@ -516,7 +530,24 @@ static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
    aSrc += 4;
    aDst += 4;
  } while (aSrc < end);
}

template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
          uint32_t aDstRGBIndex, uint32_t aDstAIndex>
static void UnpremultiplyRowFallback(const uint8_t* aSrc, uint8_t* aDst,
                                     int32_t aLength) {
  UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex,
                             aDstAIndex>(aSrc, aDst, aLength);
}

template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
          uint32_t aDstRGBIndex, uint32_t aDstAIndex>
static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
                                  uint8_t* aDst, int32_t aDstGap,
                                  IntSize aSize) {
  for (int32_t height = aSize.height; height > 0; height--) {
    UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex,
                               aDstAIndex>(aSrc, aDst, aSize.width);
    aSrc += aSrcGap;
    aDst += aDstGap;
  }
@@ -534,6 +565,18 @@ static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
  UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
  UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)

#define UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat)             \
  FORMAT_CASE_ROW(aSrcFormat, aDstFormat,                                   \
                  UnpremultiplyRowFallback<                                 \
                      ShouldSwapRB(aSrcFormat, aDstFormat),                 \
                      RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \
                      RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>)

#define UNPREMULTIPLY_ROW_FALLBACK(aSrcFormat)                         \
  UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
  UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
  UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)

bool UnpremultiplyData(const uint8_t* aSrc, int32_t aSrcStride,
                       SurfaceFormat aSrcFormat, uint8_t* aDst,
                       int32_t aDstStride, SurfaceFormat aDstFormat,
@@ -588,6 +631,42 @@ bool UnpremultiplyData(const uint8_t* aSrc, int32_t aSrcStride,
  return false;
}

SwizzleRowFn UnpremultiplyRow(SurfaceFormat aSrcFormat,
                              SurfaceFormat aDstFormat) {
#ifdef USE_SSE2
  if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
      UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
      UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
      UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
      UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
      default:
        break;
    }
#endif

#ifdef USE_NEON
  if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
      UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
      UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
      UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
      UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
      default:
        break;
    }
#endif

  switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
    UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8)
    UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8)
    UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8)
    default:
      break;
  }

  MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats");
  return nullptr;
}

/**
 * Swizzling
 */
@@ -663,6 +742,15 @@ static void SwizzleFallback(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
                         RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
                         RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)

// Fast-path for matching formats.
template <int32_t aBytesPerPixel>
static void SwizzleRowCopy(const uint8_t* aSrc, uint8_t* aDst,
                           int32_t aLength) {
  if (aSrc != aDst) {
    memcpy(aDst, aSrc, aLength * aBytesPerPixel);
  }
}

// Fast-path for matching formats.
static void SwizzleCopy(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
                        int32_t aDstGap, IntSize aSize, int32_t aBPP) {
@@ -727,6 +815,41 @@ static void SwizzleSwap(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
      SwizzleRowSwap<ShouldForceOpaque(aSrcFormat, aDstFormat), \
                     AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>)

static void SwizzleChunkSwapRGB24(const uint8_t*& aSrc, uint8_t*& aDst,
                                  int32_t aLength) {
  const uint8_t* end = aSrc + 3 * aLength;
  do {
    uint8_t r = aSrc[0];
    uint8_t g = aSrc[1];
    uint8_t b = aSrc[2];
    aDst[0] = b;
    aDst[1] = g;
    aDst[2] = r;
    aSrc += 3;
    aDst += 3;
  } while (aSrc < end);
}

static void SwizzleRowSwapRGB24(const uint8_t* aSrc, uint8_t* aDst,
                                int32_t aLength) {
  SwizzleChunkSwapRGB24(aSrc, aDst, aLength);
}

static void SwizzleSwapRGB24(const uint8_t* aSrc, int32_t aSrcGap,
                             uint8_t* aDst, int32_t aDstGap, IntSize aSize) {
  for (int32_t height = aSize.height; height > 0; height--) {
    SwizzleChunkSwapRGB24(aSrc, aDst, aSize.width);
    aSrc += aSrcGap;
    aDst += aDstGap;
  }
}

#define SWIZZLE_SWAP_RGB24(aSrcFormat, aDstFormat) \
  FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleSwapRGB24)

#define SWIZZLE_ROW_SWAP_RGB24(aSrcFormat, aDstFormat) \
  FORMAT_CASE_ROW(aSrcFormat, aDstFormat, SwizzleRowSwapRGB24)

// Fast-path for conversions that force alpha to opaque.
template <uint32_t aDstAShift>
static void SwizzleChunkOpaqueUpdate(uint8_t*& aBuffer, int32_t aLength) {
@@ -824,10 +947,9 @@ static void PackToRGB565(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,

// Packing of 32-bit formats to 24-bit formats.
template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
                        int32_t aDstGap, IntSize aSize) {
  for (int32_t height = aSize.height; height > 0; height--) {
    const uint8_t* end = aSrc + 4 * aSize.width;
static void PackChunkToRGB24(const uint8_t*& aSrc, uint8_t*& aDst,
                             int32_t aLength) {
  const uint8_t* end = aSrc + 4 * aLength;
  do {
    uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
    uint8_t g = aSrc[aSrcRGBIndex + 1];
@@ -840,7 +962,20 @@ static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
    aSrc += 4;
    aDst += 3;
  } while (aSrc < end);
}

template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
static void PackRowToRGB24(const uint8_t* aSrc, uint8_t* aDst,
                           int32_t aLength) {
  PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst, aLength);
}

template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
                        int32_t aDstGap, IntSize aSize) {
  for (int32_t height = aSize.height; height > 0; height--) {
    PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst,
                                                          aSize.width);
    aSrc += aSrcGap;
    aDst += aDstGap;
  }
@@ -859,6 +994,20 @@ static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
  PACK_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
  PACK_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)

#define PACK_ROW_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc)                   \
  FORMAT_CASE_ROW(                                                             \
      aSrcFormat, aDstFormat,                                                  \
      aPackFunc<ShouldSwapRB(aSrcFormat, aDstFormat), RGBBitShift(aSrcFormat), \
                RGBByteIndex(aSrcFormat)>)

#define PACK_ROW_RGB(aDstFormat, aPackFunc)                         \
  PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
  PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \
  PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
  PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \
  PACK_ROW_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
  PACK_ROW_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)

// Packing of 32-bit formats to A8.
template <uint32_t aSrcAIndex>
static void PackToA8(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
@@ -1006,6 +1155,9 @@ bool SwizzleData(const uint8_t* aSrc, int32_t aSrcStride,
    SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8)
    SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8)

    SWIZZLE_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8)
    SWIZZLE_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8)

    SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
    SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8)
    SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
@@ -1121,6 +1273,9 @@ SwizzleRowFn SwizzleRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) {
    SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8)
    SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8)

    SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8)
    SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8)

    UNPACK_ROW_RGB(SurfaceFormat::R8G8B8X8)
    UNPACK_ROW_RGB(SurfaceFormat::R8G8B8A8)
    UNPACK_ROW_RGB(SurfaceFormat::B8G8R8X8)
@@ -1128,10 +1283,24 @@ SwizzleRowFn SwizzleRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) {
    UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::A8R8G8B8)
    UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::X8R8G8B8)

    PACK_ROW_RGB(SurfaceFormat::R8G8B8, PackRowToRGB24)
    PACK_ROW_RGB(SurfaceFormat::B8G8R8, PackRowToRGB24)

    default:
      break;
  }

  if (aSrcFormat == aDstFormat) {
    switch (BytesPerPixel(aSrcFormat)) {
      case 4:
        return &SwizzleRowCopy<4>;
      case 3:
        return &SwizzleRowCopy<3>;
      default:
        break;
    }
  }

  MOZ_ASSERT_UNREACHABLE("Unsupported swizzle formats");
  return nullptr;
}
+6 −0
Original line number Diff line number Diff line
@@ -54,6 +54,12 @@ typedef void (*SwizzleRowFn)(const uint8_t* aSrc, uint8_t* aDst,
GFX2D_API SwizzleRowFn PremultiplyRow(SurfaceFormat aSrcFormat,
                                      SurfaceFormat aDstFormat);

/**
 * Get a function pointer to perform unpremultiplication between two formats.
 */
GFX2D_API SwizzleRowFn UnpremultiplyRow(SurfaceFormat aSrcFormat,
                                        SurfaceFormat aDstFormat);

/**
 * Get a function pointer to perform swizzling between two formats.
 */
+33 −16
Original line number Diff line number Diff line
@@ -244,17 +244,12 @@ UnpremultiplyVector_NEON(const uint16x8_t& aSrc) {
}

template <bool aSwapRB>
void Unpremultiply_NEON(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
                        int32_t aDstGap, IntSize aSize) {
  int32_t alignedRow = 4 * (aSize.width & ~3);
  int32_t remainder = aSize.width & 3;
  // Fold remainder into stride gap.
  aSrcGap += 4 * remainder;
  aDstGap += 4 * remainder;

  for (int32_t height = aSize.height; height > 0; height--) {
static MOZ_ALWAYS_INLINE void UnpremultiplyChunk_NEON(const uint8_t*& aSrc,
                                                      uint8_t*& aDst,
                                                      int32_t aAlignedRow,
                                                      int32_t aRemainder) {
  // Process all 4-pixel chunks as one vector.
    for (const uint8_t* end = aSrc + alignedRow; aSrc < end;) {
  for (const uint8_t* end = aSrc + aAlignedRow; aSrc < end;) {
    uint16x8_t px = vld1q_u16(reinterpret_cast<const uint16_t*>(aSrc));
    px = UnpremultiplyVector_NEON<aSwapRB>(px);
    vst1q_u16(reinterpret_cast<uint16_t*>(aDst), px);
@@ -263,18 +258,40 @@ void Unpremultiply_NEON(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
  }

  // Handle any 1-3 remaining pixels.
    if (remainder) {
      uint16x8_t px = LoadRemainder_NEON(aSrc, remainder);
  if (aRemainder) {
    uint16x8_t px = LoadRemainder_NEON(aSrc, aRemainder);
    px = UnpremultiplyVector_NEON<aSwapRB>(px);
      StoreRemainder_NEON(aDst, remainder, px);
    StoreRemainder_NEON(aDst, aRemainder, px);
  }
}

template <bool aSwapRB>
void UnpremultiplyRow_NEON(const uint8_t* aSrc, uint8_t* aDst,
                           int32_t aLength) {
  int32_t alignedRow = 4 * (aLength & ~3);
  int32_t remainder = aLength & 3;
  UnpremultiplyChunk_NEON<aSwapRB>(aSrc, aDst, alignedRow, remainder);
}

template <bool aSwapRB>
void Unpremultiply_NEON(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
                        int32_t aDstGap, IntSize aSize) {
  int32_t alignedRow = 4 * (aSize.width & ~3);
  int32_t remainder = aSize.width & 3;
  // Fold remainder into stride gap.
  aSrcGap += 4 * remainder;
  aDstGap += 4 * remainder;

  for (int32_t height = aSize.height; height > 0; height--) {
    UnpremultiplyChunk_NEON<aSwapRB>(aSrc, aDst, alignedRow, remainder);
    aSrc += aSrcGap;
    aDst += aDstGap;
  }
}

// Force instantiation of unpremultiply variants here.
template void UnpremultiplyRow_NEON<false>(const uint8_t*, uint8_t*, int32_t);
template void UnpremultiplyRow_NEON<true>(const uint8_t*, uint8_t*, int32_t);
template void Unpremultiply_NEON<false>(const uint8_t*, int32_t, uint8_t*,
                                        int32_t, IntSize);
template void Unpremultiply_NEON<true>(const uint8_t*, int32_t, uint8_t*,
+33 −16
Original line number Diff line number Diff line
@@ -243,17 +243,12 @@ static MOZ_ALWAYS_INLINE __m128i UnpremultiplyVector_SSE2(const __m128i& aSrc) {
}

template <bool aSwapRB>
void Unpremultiply_SSE2(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
                        int32_t aDstGap, IntSize aSize) {
  int32_t alignedRow = 4 * (aSize.width & ~3);
  int32_t remainder = aSize.width & 3;
  // Fold remainder into stride gap.
  aSrcGap += 4 * remainder;
  aDstGap += 4 * remainder;

  for (int32_t height = aSize.height; height > 0; height--) {
static MOZ_ALWAYS_INLINE void UnpremultiplyChunk_SSE2(const uint8_t*& aSrc,
                                                      uint8_t*& aDst,
                                                      int32_t aAlignedRow,
                                                      int32_t aRemainder) {
  // Process all 4-pixel chunks as one vector.
    for (const uint8_t* end = aSrc + alignedRow; aSrc < end;) {
  for (const uint8_t* end = aSrc + aAlignedRow; aSrc < end;) {
    __m128i px = _mm_loadu_si128(reinterpret_cast<const __m128i*>(aSrc));
    px = UnpremultiplyVector_SSE2<aSwapRB>(px);
    _mm_storeu_si128(reinterpret_cast<__m128i*>(aDst), px);
@@ -262,18 +257,40 @@ void Unpremultiply_SSE2(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
  }

  // Handle any 1-3 remaining pixels.
    if (remainder) {
      __m128i px = LoadRemainder_SSE2(aSrc, remainder);
  if (aRemainder) {
    __m128i px = LoadRemainder_SSE2(aSrc, aRemainder);
    px = UnpremultiplyVector_SSE2<aSwapRB>(px);
      StoreRemainder_SSE2(aDst, remainder, px);
    StoreRemainder_SSE2(aDst, aRemainder, px);
  }
}

template <bool aSwapRB>
void UnpremultiplyRow_SSE2(const uint8_t* aSrc, uint8_t* aDst,
                           int32_t aLength) {
  int32_t alignedRow = 4 * (aLength & ~3);
  int32_t remainder = aLength & 3;
  UnpremultiplyChunk_SSE2<aSwapRB>(aSrc, aDst, alignedRow, remainder);
}

template <bool aSwapRB>
void Unpremultiply_SSE2(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
                        int32_t aDstGap, IntSize aSize) {
  int32_t alignedRow = 4 * (aSize.width & ~3);
  int32_t remainder = aSize.width & 3;
  // Fold remainder into stride gap.
  aSrcGap += 4 * remainder;
  aDstGap += 4 * remainder;

  for (int32_t height = aSize.height; height > 0; height--) {
    UnpremultiplyChunk_SSE2<aSwapRB>(aSrc, aDst, alignedRow, remainder);
    aSrc += aSrcGap;
    aDst += aDstGap;
  }
}

// Force instantiation of unpremultiply variants here.
template void UnpremultiplyRow_SSE2<false>(const uint8_t*, uint8_t*, int32_t);
template void UnpremultiplyRow_SSE2<true>(const uint8_t*, uint8_t*, int32_t);
template void Unpremultiply_SSE2<false>(const uint8_t*, int32_t, uint8_t*,
                                        int32_t, IntSize);
template void Unpremultiply_SSE2<true>(const uint8_t*, int32_t, uint8_t*,
+61 −0
Original line number Diff line number Diff line
@@ -62,6 +62,9 @@ TEST(Moz2D, PremultiplyRow)
  const uint8_t check_rgba[5 * 4] = {
      0, 255, 255, 255, 255, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128,
  };
  const uint8_t check_argb[5 * 4] = {
      255, 0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 128,
  };

  SwizzleRowFn func =
      PremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8);
@@ -71,6 +74,10 @@ TEST(Moz2D, PremultiplyRow)
  func = PremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8);
  func(in_bgra, out, 5);
  EXPECT_TRUE(ArrayEqual(out, check_rgba));

  func = PremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8);
  func(in_bgra, out, 5);
  EXPECT_TRUE(ArrayEqual(out, check_argb));
}

TEST(Moz2D, UnpremultiplyData)
@@ -107,6 +114,41 @@ TEST(Moz2D, UnpremultiplyData)
  EXPECT_TRUE(ArrayEqual(out, check_argb));
}

TEST(Moz2D, UnpremultiplyRow)
{
  const uint8_t in_bgra[5 * 4] = {
      255, 255, 0,   255,              // verify 255 alpha leaves RGB unchanged
      0,   0,   255, 255, 0, 0, 0, 0,  // verify 0 alpha leaves RGB at 0
      0,   0,   0,   64,   // verify 0 RGB stays 0 with non-zero alpha
      128, 0,   0,   128,  // verify that RGB == alpha maps to 255

  };
  uint8_t out[5 * 4];
  const uint8_t check_bgra[5 * 4] = {
      255, 255, 0, 255, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 64, 255, 0, 0, 128,
  };
  // check swizzled output
  const uint8_t check_rgba[5 * 4] = {
      0, 255, 255, 255, 255, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 255, 128,
  };
  const uint8_t check_argb[5 * 4] = {
      255, 0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 128, 0, 0, 255,
  };

  SwizzleRowFn func =
      UnpremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8);
  func(in_bgra, out, 5);
  EXPECT_TRUE(ArrayEqual(out, check_bgra));

  func = UnpremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8);
  func(in_bgra, out, 5);
  EXPECT_TRUE(ArrayEqual(out, check_rgba));

  func = UnpremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8);
  func(in_bgra, out, 5);
  EXPECT_TRUE(ArrayEqual(out, check_argb));
}

TEST(Moz2D, SwizzleData)
{
  const uint8_t in_bgra[5 * 4] = {
@@ -200,6 +242,13 @@ TEST(Moz2D, SwizzleRow)
      0, 254, 253, 255, 255, 0,   0, 255, 0,   0,
      0, 255, 3,   2,   1,   255, 9, 0,   127, 255,
  };
  // check packing
  uint8_t out24[5 * 3];
  const uint8_t check_bgr[5 * 3] = {253, 254, 0, 0, 0,   255, 0, 0,
                                    0,   1,   2, 3, 127, 0,   9};
  const uint8_t check_rgb[5 * 3] = {
      0, 254, 253, 255, 0, 0, 0, 0, 0, 3, 2, 1, 9, 0, 127,
  };
  // check unpacking
  uint8_t out_unpack[16 * 4];
  const uint8_t in_rgb[16 * 3] = {
@@ -235,6 +284,18 @@ TEST(Moz2D, SwizzleRow)
  func(in_bgra, out, 5);
  EXPECT_TRUE(ArrayEqual(out, check_rgbx));

  func = SwizzleRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8);
  func(in_bgra, out, 5);
  EXPECT_TRUE(ArrayEqual(out, in_bgra));

  func = SwizzleRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8);
  func(in_bgra, out24, 5);
  EXPECT_TRUE(ArrayEqual(out24, check_bgr));

  func = SwizzleRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8);
  func(in_bgra, out24, 5);
  EXPECT_TRUE(ArrayEqual(out24, check_rgb));

  func = SwizzleRow(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8X8);
  func(in_rgb, out_unpack, 16);
  EXPECT_TRUE(ArrayEqual(out_unpack, check_unpack_bgrx));