Bug 1622220 - Add UnpremultiplyRow and extend SwizzleRow. r=lsalzman (1a327125) · Commits · morgan / Tor Browser

gfx/2d/Swizzle.cpp

+203 −34

Original line number	Diff line number	Diff line
		@@ -134,6 +134,14 @@ void Unpremultiply_SSE2(const uint8_t, int32_t, uint8_t, int32_t, IntSize);
		FORMAT_CASE(aSrcFormat, aDstFormat, \
		Unpremultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)

		template <bool aSwapRB>
		void UnpremultiplyRow_SSE2(const uint8_t, uint8_t, int32_t);

		# define UNPREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \
		FORMAT_CASE_ROW( \
		aSrcFormat, aDstFormat, \
		UnpremultiplyRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)

		template <bool aSwapRB, bool aOpaqueAlpha>
		void Swizzle_SSE2(const uint8_t, int32_t, uint8_t, int32_t, IntSize);

		@@ -198,6 +206,14 @@ void Unpremultiply_NEON(const uint8_t, int32_t, uint8_t, int32_t, IntSize);
		FORMAT_CASE(aSrcFormat, aDstFormat, \
		Unpremultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)

		template <bool aSwapRB>
		void UnpremultiplyRow_NEON(const uint8_t, uint8_t, int32_t);

		# define UNPREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \
		FORMAT_CASE_ROW( \
		aSrcFormat, aDstFormat, \
		UnpremultiplyRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)

		template <bool aSwapRB, bool aOpaqueAlpha>
		void Swizzle_NEON(const uint8_t, int32_t, uint8_t, int32_t, IntSize);

		@@ -493,11 +509,9 @@ static const uint32_t sUnpremultiplyTable[256] = {0,
		// shifting/masking to access components.
		template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
		uint32_t aDstRGBIndex, uint32_t aDstAIndex>
		static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
		uint8_t* aDst, int32_t aDstGap,
		IntSize aSize) {
		for (int32_t height = aSize.height; height > 0; height--) {
		const uint8_t* end = aSrc + 4 * aSize.width;
		static void UnpremultiplyChunkFallback(const uint8_t& aSrc, uint8_t& aDst,
		int32_t aLength) {
		const uint8_t* end = aSrc + 4 * aLength;
		do {
		uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
		uint8_t g = aSrc[aSrcRGBIndex + 1];
		@@ -516,7 +530,24 @@ static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
		aSrc += 4;
		aDst += 4;
		} while (aSrc < end);
		}

		template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
		uint32_t aDstRGBIndex, uint32_t aDstAIndex>
		static void UnpremultiplyRowFallback(const uint8_t* aSrc, uint8_t* aDst,
		int32_t aLength) {
		UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex,
		aDstAIndex>(aSrc, aDst, aLength);
		}

		template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
		uint32_t aDstRGBIndex, uint32_t aDstAIndex>
		static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
		uint8_t* aDst, int32_t aDstGap,
		IntSize aSize) {
		for (int32_t height = aSize.height; height > 0; height--) {
		UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex,
		aDstAIndex>(aSrc, aDst, aSize.width);
		aSrc += aSrcGap;
		aDst += aDstGap;
		}
		@@ -534,6 +565,18 @@ static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
		UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
		UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)

		#define UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat) \
		FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \
		UnpremultiplyRowFallback< \
		ShouldSwapRB(aSrcFormat, aDstFormat), \
		RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \
		RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>)

		#define UNPREMULTIPLY_ROW_FALLBACK(aSrcFormat) \
		UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
		UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
		UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)

		bool UnpremultiplyData(const uint8_t* aSrc, int32_t aSrcStride,
		SurfaceFormat aSrcFormat, uint8_t* aDst,
		int32_t aDstStride, SurfaceFormat aDstFormat,
		@@ -588,6 +631,42 @@ bool UnpremultiplyData(const uint8_t* aSrc, int32_t aSrcStride,
		return false;
		}

		SwizzleRowFn UnpremultiplyRow(SurfaceFormat aSrcFormat,
		SurfaceFormat aDstFormat) {
		#ifdef USE_SSE2
		if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
		UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
		UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
		UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
		UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
		default:
		break;
		}
		#endif

		#ifdef USE_NEON
		if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
		UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
		UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
		UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
		UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
		default:
		break;
		}
		#endif

		switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
		UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8)
		UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8)
		UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8)
		default:
		break;
		}

		MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats");
		return nullptr;
		}

		/**
		* Swizzling
		*/
		@@ -663,6 +742,15 @@ static void SwizzleFallback(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
		RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)

		// Fast-path for matching formats.
		template <int32_t aBytesPerPixel>
		static void SwizzleRowCopy(const uint8_t* aSrc, uint8_t* aDst,
		int32_t aLength) {
		if (aSrc != aDst) {
		memcpy(aDst, aSrc, aLength * aBytesPerPixel);
		}
		}

		// Fast-path for matching formats.
		static void SwizzleCopy(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		int32_t aDstGap, IntSize aSize, int32_t aBPP) {
		@@ -727,6 +815,41 @@ static void SwizzleSwap(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		SwizzleRowSwap<ShouldForceOpaque(aSrcFormat, aDstFormat), \
		AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>)

		static void SwizzleChunkSwapRGB24(const uint8_t& aSrc, uint8_t& aDst,
		int32_t aLength) {
		const uint8_t* end = aSrc + 3 * aLength;
		do {
		uint8_t r = aSrc[0];
		uint8_t g = aSrc[1];
		uint8_t b = aSrc[2];
		aDst[0] = b;
		aDst[1] = g;
		aDst[2] = r;
		aSrc += 3;
		aDst += 3;
		} while (aSrc < end);
		}

		static void SwizzleRowSwapRGB24(const uint8_t* aSrc, uint8_t* aDst,
		int32_t aLength) {
		SwizzleChunkSwapRGB24(aSrc, aDst, aLength);
		}

		static void SwizzleSwapRGB24(const uint8_t* aSrc, int32_t aSrcGap,
		uint8_t* aDst, int32_t aDstGap, IntSize aSize) {
		for (int32_t height = aSize.height; height > 0; height--) {
		SwizzleChunkSwapRGB24(aSrc, aDst, aSize.width);
		aSrc += aSrcGap;
		aDst += aDstGap;
		}
		}

		#define SWIZZLE_SWAP_RGB24(aSrcFormat, aDstFormat) \
		FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleSwapRGB24)

		#define SWIZZLE_ROW_SWAP_RGB24(aSrcFormat, aDstFormat) \
		FORMAT_CASE_ROW(aSrcFormat, aDstFormat, SwizzleRowSwapRGB24)

		// Fast-path for conversions that force alpha to opaque.
		template <uint32_t aDstAShift>
		static void SwizzleChunkOpaqueUpdate(uint8_t*& aBuffer, int32_t aLength) {
		@@ -824,10 +947,9 @@ static void PackToRGB565(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,

		// Packing of 32-bit formats to 24-bit formats.
		template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
		static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		int32_t aDstGap, IntSize aSize) {
		for (int32_t height = aSize.height; height > 0; height--) {
		const uint8_t* end = aSrc + 4 * aSize.width;
		static void PackChunkToRGB24(const uint8_t& aSrc, uint8_t& aDst,
		int32_t aLength) {
		const uint8_t* end = aSrc + 4 * aLength;
		do {
		uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
		uint8_t g = aSrc[aSrcRGBIndex + 1];
		@@ -840,7 +962,20 @@ static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		aSrc += 4;
		aDst += 3;
		} while (aSrc < end);
		}

		template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
		static void PackRowToRGB24(const uint8_t* aSrc, uint8_t* aDst,
		int32_t aLength) {
		PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst, aLength);
		}

		template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
		static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		int32_t aDstGap, IntSize aSize) {
		for (int32_t height = aSize.height; height > 0; height--) {
		PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst,
		aSize.width);
		aSrc += aSrcGap;
		aDst += aDstGap;
		}
		@@ -859,6 +994,20 @@ static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		PACK_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
		PACK_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)

		#define PACK_ROW_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc) \
		FORMAT_CASE_ROW( \
		aSrcFormat, aDstFormat, \
		aPackFunc<ShouldSwapRB(aSrcFormat, aDstFormat), RGBBitShift(aSrcFormat), \
		RGBByteIndex(aSrcFormat)>)

		#define PACK_ROW_RGB(aDstFormat, aPackFunc) \
		PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
		PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \
		PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
		PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \
		PACK_ROW_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
		PACK_ROW_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)

		// Packing of 32-bit formats to A8.
		template <uint32_t aSrcAIndex>
		static void PackToA8(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		@@ -1006,6 +1155,9 @@ bool SwizzleData(const uint8_t* aSrc, int32_t aSrcStride,
		SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8)
		SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8)

		SWIZZLE_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8)
		SWIZZLE_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8)

		SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
		SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8)
		SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
		@@ -1121,6 +1273,9 @@ SwizzleRowFn SwizzleRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) {
		SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8)
		SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8)

		SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8)
		SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8)

		UNPACK_ROW_RGB(SurfaceFormat::R8G8B8X8)
		UNPACK_ROW_RGB(SurfaceFormat::R8G8B8A8)
		UNPACK_ROW_RGB(SurfaceFormat::B8G8R8X8)
		@@ -1128,10 +1283,24 @@ SwizzleRowFn SwizzleRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) {
		UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::A8R8G8B8)
		UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::X8R8G8B8)

		PACK_ROW_RGB(SurfaceFormat::R8G8B8, PackRowToRGB24)
		PACK_ROW_RGB(SurfaceFormat::B8G8R8, PackRowToRGB24)

		default:
		break;
		}

		if (aSrcFormat == aDstFormat) {
		switch (BytesPerPixel(aSrcFormat)) {
		case 4:
		return &SwizzleRowCopy<4>;
		case 3:
		return &SwizzleRowCopy<3>;
		default:
		break;
		}
		}

		MOZ_ASSERT_UNREACHABLE("Unsupported swizzle formats");
		return nullptr;
		}

gfx/2d/Swizzle.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -54,6 +54,12 @@ typedef void (SwizzleRowFn)(const uint8_t aSrc, uint8_t* aDst,
		GFX2D_API SwizzleRowFn PremultiplyRow(SurfaceFormat aSrcFormat,
		SurfaceFormat aDstFormat);

		/**
		* Get a function pointer to perform unpremultiplication between two formats.
		*/
		GFX2D_API SwizzleRowFn UnpremultiplyRow(SurfaceFormat aSrcFormat,
		SurfaceFormat aDstFormat);

		/**
		* Get a function pointer to perform swizzling between two formats.
		*/

gfx/2d/SwizzleNEON.cpp

+33 −16

Original line number	Diff line number	Diff line
		@@ -244,17 +244,12 @@ UnpremultiplyVector_NEON(const uint16x8_t& aSrc) {
		}

		template <bool aSwapRB>
		void Unpremultiply_NEON(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		int32_t aDstGap, IntSize aSize) {
		int32_t alignedRow = 4 * (aSize.width & ~3);
		int32_t remainder = aSize.width & 3;
		// Fold remainder into stride gap.
		aSrcGap += 4 * remainder;
		aDstGap += 4 * remainder;

		for (int32_t height = aSize.height; height > 0; height--) {
		static MOZ_ALWAYS_INLINE void UnpremultiplyChunk_NEON(const uint8_t*& aSrc,
		uint8_t*& aDst,
		int32_t aAlignedRow,
		int32_t aRemainder) {
		// Process all 4-pixel chunks as one vector.
		for (const uint8_t* end = aSrc + alignedRow; aSrc < end;) {
		for (const uint8_t* end = aSrc + aAlignedRow; aSrc < end;) {
		uint16x8_t px = vld1q_u16(reinterpret_cast<const uint16_t*>(aSrc));
		px = UnpremultiplyVector_NEON<aSwapRB>(px);
		vst1q_u16(reinterpret_cast<uint16_t*>(aDst), px);
		@@ -263,18 +258,40 @@ void Unpremultiply_NEON(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		}

		// Handle any 1-3 remaining pixels.
		if (remainder) {
		uint16x8_t px = LoadRemainder_NEON(aSrc, remainder);
		if (aRemainder) {
		uint16x8_t px = LoadRemainder_NEON(aSrc, aRemainder);
		px = UnpremultiplyVector_NEON<aSwapRB>(px);
		StoreRemainder_NEON(aDst, remainder, px);
		StoreRemainder_NEON(aDst, aRemainder, px);
		}
		}

		template <bool aSwapRB>
		void UnpremultiplyRow_NEON(const uint8_t* aSrc, uint8_t* aDst,
		int32_t aLength) {
		int32_t alignedRow = 4 * (aLength & ~3);
		int32_t remainder = aLength & 3;
		UnpremultiplyChunk_NEON<aSwapRB>(aSrc, aDst, alignedRow, remainder);
		}

		template <bool aSwapRB>
		void Unpremultiply_NEON(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		int32_t aDstGap, IntSize aSize) {
		int32_t alignedRow = 4 * (aSize.width & ~3);
		int32_t remainder = aSize.width & 3;
		// Fold remainder into stride gap.
		aSrcGap += 4 * remainder;
		aDstGap += 4 * remainder;

		for (int32_t height = aSize.height; height > 0; height--) {
		UnpremultiplyChunk_NEON<aSwapRB>(aSrc, aDst, alignedRow, remainder);
		aSrc += aSrcGap;
		aDst += aDstGap;
		}
		}

		// Force instantiation of unpremultiply variants here.
		template void UnpremultiplyRow_NEON<false>(const uint8_t, uint8_t, int32_t);
		template void UnpremultiplyRow_NEON<true>(const uint8_t, uint8_t, int32_t);
		template void Unpremultiply_NEON<false>(const uint8_t, int32_t, uint8_t,
		int32_t, IntSize);
		template void Unpremultiply_NEON<true>(const uint8_t, int32_t, uint8_t,

gfx/2d/SwizzleSSE2.cpp

+33 −16

Original line number	Diff line number	Diff line
		@@ -243,17 +243,12 @@ static MOZ_ALWAYS_INLINE __m128i UnpremultiplyVector_SSE2(const __m128i& aSrc) {
		}

		template <bool aSwapRB>
		void Unpremultiply_SSE2(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		int32_t aDstGap, IntSize aSize) {
		int32_t alignedRow = 4 * (aSize.width & ~3);
		int32_t remainder = aSize.width & 3;
		// Fold remainder into stride gap.
		aSrcGap += 4 * remainder;
		aDstGap += 4 * remainder;

		for (int32_t height = aSize.height; height > 0; height--) {
		static MOZ_ALWAYS_INLINE void UnpremultiplyChunk_SSE2(const uint8_t*& aSrc,
		uint8_t*& aDst,
		int32_t aAlignedRow,
		int32_t aRemainder) {
		// Process all 4-pixel chunks as one vector.
		for (const uint8_t* end = aSrc + alignedRow; aSrc < end;) {
		for (const uint8_t* end = aSrc + aAlignedRow; aSrc < end;) {
		__m128i px = _mm_loadu_si128(reinterpret_cast<const __m128i*>(aSrc));
		px = UnpremultiplyVector_SSE2<aSwapRB>(px);
		_mm_storeu_si128(reinterpret_cast<__m128i*>(aDst), px);
		@@ -262,18 +257,40 @@ void Unpremultiply_SSE2(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		}

		// Handle any 1-3 remaining pixels.
		if (remainder) {
		__m128i px = LoadRemainder_SSE2(aSrc, remainder);
		if (aRemainder) {
		__m128i px = LoadRemainder_SSE2(aSrc, aRemainder);
		px = UnpremultiplyVector_SSE2<aSwapRB>(px);
		StoreRemainder_SSE2(aDst, remainder, px);
		StoreRemainder_SSE2(aDst, aRemainder, px);
		}
		}

		template <bool aSwapRB>
		void UnpremultiplyRow_SSE2(const uint8_t* aSrc, uint8_t* aDst,
		int32_t aLength) {
		int32_t alignedRow = 4 * (aLength & ~3);
		int32_t remainder = aLength & 3;
		UnpremultiplyChunk_SSE2<aSwapRB>(aSrc, aDst, alignedRow, remainder);
		}

		template <bool aSwapRB>
		void Unpremultiply_SSE2(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
		int32_t aDstGap, IntSize aSize) {
		int32_t alignedRow = 4 * (aSize.width & ~3);
		int32_t remainder = aSize.width & 3;
		// Fold remainder into stride gap.
		aSrcGap += 4 * remainder;
		aDstGap += 4 * remainder;

		for (int32_t height = aSize.height; height > 0; height--) {
		UnpremultiplyChunk_SSE2<aSwapRB>(aSrc, aDst, alignedRow, remainder);
		aSrc += aSrcGap;
		aDst += aDstGap;
		}
		}

		// Force instantiation of unpremultiply variants here.
		template void UnpremultiplyRow_SSE2<false>(const uint8_t, uint8_t, int32_t);
		template void UnpremultiplyRow_SSE2<true>(const uint8_t, uint8_t, int32_t);
		template void Unpremultiply_SSE2<false>(const uint8_t, int32_t, uint8_t,
		int32_t, IntSize);
		template void Unpremultiply_SSE2<true>(const uint8_t, int32_t, uint8_t,

gfx/tests/gtest/TestSwizzle.cpp

+61 −0

Original line number	Diff line number	Diff line
		@@ -62,6 +62,9 @@ TEST(Moz2D, PremultiplyRow)
		const uint8_t check_rgba[5 * 4] = {
		0, 255, 255, 255, 255, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128,
		};
		const uint8_t check_argb[5 * 4] = {
		255, 0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 128,
		};

		SwizzleRowFn func =
		PremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8);
		@@ -71,6 +74,10 @@ TEST(Moz2D, PremultiplyRow)
		func = PremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8);
		func(in_bgra, out, 5);
		EXPECT_TRUE(ArrayEqual(out, check_rgba));

		func = PremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8);
		func(in_bgra, out, 5);
		EXPECT_TRUE(ArrayEqual(out, check_argb));
		}

		TEST(Moz2D, UnpremultiplyData)
		@@ -107,6 +114,41 @@ TEST(Moz2D, UnpremultiplyData)
		EXPECT_TRUE(ArrayEqual(out, check_argb));
		}

		TEST(Moz2D, UnpremultiplyRow)
		{
		const uint8_t in_bgra[5 * 4] = {
		255, 255, 0, 255, // verify 255 alpha leaves RGB unchanged
		0, 0, 255, 255, 0, 0, 0, 0, // verify 0 alpha leaves RGB at 0
		0, 0, 0, 64, // verify 0 RGB stays 0 with non-zero alpha
		128, 0, 0, 128, // verify that RGB == alpha maps to 255

		};
		uint8_t out[5 * 4];
		const uint8_t check_bgra[5 * 4] = {
		255, 255, 0, 255, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 64, 255, 0, 0, 128,
		};
		// check swizzled output
		const uint8_t check_rgba[5 * 4] = {
		0, 255, 255, 255, 255, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 255, 128,
		};
		const uint8_t check_argb[5 * 4] = {
		255, 0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 128, 0, 0, 255,
		};

		SwizzleRowFn func =
		UnpremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8);
		func(in_bgra, out, 5);
		EXPECT_TRUE(ArrayEqual(out, check_bgra));

		func = UnpremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8);
		func(in_bgra, out, 5);
		EXPECT_TRUE(ArrayEqual(out, check_rgba));

		func = UnpremultiplyRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8);
		func(in_bgra, out, 5);
		EXPECT_TRUE(ArrayEqual(out, check_argb));
		}

		TEST(Moz2D, SwizzleData)
		{
		const uint8_t in_bgra[5 * 4] = {
		@@ -200,6 +242,13 @@ TEST(Moz2D, SwizzleRow)
		0, 254, 253, 255, 255, 0, 0, 255, 0, 0,
		0, 255, 3, 2, 1, 255, 9, 0, 127, 255,
		};
		// check packing
		uint8_t out24[5 * 3];
		const uint8_t check_bgr[5 * 3] = {253, 254, 0, 0, 0, 255, 0, 0,
		0, 1, 2, 3, 127, 0, 9};
		const uint8_t check_rgb[5 * 3] = {
		0, 254, 253, 255, 0, 0, 0, 0, 0, 3, 2, 1, 9, 0, 127,
		};
		// check unpacking
		uint8_t out_unpack[16 * 4];
		const uint8_t in_rgb[16 * 3] = {
		@@ -235,6 +284,18 @@ TEST(Moz2D, SwizzleRow)
		func(in_bgra, out, 5);
		EXPECT_TRUE(ArrayEqual(out, check_rgbx));

		func = SwizzleRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8);
		func(in_bgra, out, 5);
		EXPECT_TRUE(ArrayEqual(out, in_bgra));

		func = SwizzleRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8);
		func(in_bgra, out24, 5);
		EXPECT_TRUE(ArrayEqual(out24, check_bgr));

		func = SwizzleRow(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8);
		func(in_bgra, out24, 5);
		EXPECT_TRUE(ArrayEqual(out24, check_rgb));

		func = SwizzleRow(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8X8);
		func(in_rgb, out_unpack, 16);
		EXPECT_TRUE(ArrayEqual(out_unpack, check_unpack_bgrx));