From 341290c2535e2991577344798d8abc7c79ded04a Mon Sep 17 00:00:00 2001 From: Yury Delendik <ydelendik@mozilla.com> Date: Thu, 9 Sep 2021 16:40:44 +0000 Subject: [PATCH] Bug 1672343 - Move shuffle analysis to the MIR. r=lth Differential Revision: https://phabricator.services.mozilla.com/D124245 --- js/src/jit/MIR.cpp | 13 +- js/src/jit/MIR.h | 1 + js/src/jit/MIROps.yaml | 2 +- js/src/jit/ShuffleAnalysis.cpp | 717 ++++++++++++++++++ js/src/jit/ShuffleAnalysis.h | 133 ++++ js/src/jit/arm64/CodeGenerator-arm64.cpp | 42 +- js/src/jit/arm64/Lowering-arm64.cpp | 35 +- js/src/jit/moz.build | 1 + js/src/jit/shared/LIR-shared.h | 90 +-- js/src/jit/shared/Lowering-shared.cpp | 698 ----------------- js/src/jit/shared/Lowering-shared.h | 46 -- .../x86-shared/CodeGenerator-x86-shared.cpp | 42 +- js/src/jit/x86-shared/Lowering-x86-shared.cpp | 37 +- js/src/wasm/WasmIonCompile.cpp | 8 +- 14 files changed, 949 insertions(+), 916 deletions(-) create mode 100644 js/src/jit/ShuffleAnalysis.cpp create mode 100644 js/src/jit/ShuffleAnalysis.h diff --git a/js/src/jit/MIR.cpp b/js/src/jit/MIR.cpp index 87ace27effce7..25060cd6c8bf4 100644 --- a/js/src/jit/MIR.cpp +++ b/js/src/jit/MIR.cpp @@ -4554,8 +4554,9 @@ MDefinition* MWasmTernarySimd128::foldsTo(TempAllocator& alloc) { v2()->op() == MDefinition::Opcode::WasmFloatConstant) { int8_t shuffle[16]; if (specializeBitselectConstantMaskAsShuffle(shuffle)) { - return MWasmShuffleSimd128::New(alloc, v0(), v1(), - SimdConstant::CreateX16(shuffle)); + SimdShuffle s = + AnalyzeSimdShuffle(SimdConstant::CreateX16(shuffle), v0(), v1()); + return MWasmShuffleSimd128::New(alloc, v0(), v1(), s); } } return this; @@ -4581,8 +4582,9 @@ MDefinition* MWasmBinarySimd128::foldsTo(TempAllocator& alloc) { return nullptr; } block()->insertBefore(this, zero); - return MWasmShuffleSimd128::New(alloc, lhs(), zero, - SimdConstant::CreateX16(shuffleMask)); + SimdShuffle s = + AnalyzeSimdShuffle(SimdConstant::CreateX16(shuffleMask), lhs(), zero); + return MWasmShuffleSimd128::New(alloc, lhs(), zero, s); } // Specialize var OP const / const OP var when possible. @@ -6081,7 +6083,8 @@ bool MWasmShiftSimd128::congruentTo(const MDefinition* ins) const { } bool MWasmShuffleSimd128::congruentTo(const MDefinition* ins) const { - return ins->toWasmShuffleSimd128()->control().bitwiseEqual(control_) && + return ins->toWasmShuffleSimd128()->shuffle().control.bitwiseEqual( + shuffle_.control) && congruentIfOperandsEqual(ins); } diff --git a/js/src/jit/MIR.h b/js/src/jit/MIR.h index 76599162ac7d3..2618fb076e557 100644 --- a/js/src/jit/MIR.h +++ b/js/src/jit/MIR.h @@ -26,6 +26,7 @@ #include "jit/JitAllocPolicy.h" #include "jit/MacroAssembler.h" #include "jit/MIROpsGenerated.h" +#include "jit/ShuffleAnalysis.h" #include "jit/TypeData.h" #include "jit/TypePolicy.h" #include "js/experimental/JitInfo.h" // JSJit{Getter,Setter}Op, JSJitInfo diff --git a/js/src/jit/MIROps.yaml b/js/src/jit/MIROps.yaml index 26093f9c74622..ea5f3b09b9020 100644 --- a/js/src/jit/MIROps.yaml +++ b/js/src/jit/MIROps.yaml @@ -2588,7 +2588,7 @@ lhs: Simd128 rhs: Simd128 arguments: - control: SimdConstant + shuffle: SimdShuffle type_policy: none result_type: Simd128 movable: true diff --git a/js/src/jit/ShuffleAnalysis.cpp b/js/src/jit/ShuffleAnalysis.cpp new file mode 100644 index 0000000000000..f63d89409d4ee --- /dev/null +++ b/js/src/jit/ShuffleAnalysis.cpp @@ -0,0 +1,717 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/ShuffleAnalysis.h" +#include "jit/MIR.h" + +using namespace js; +using namespace jit; + +using mozilla::Maybe; +using mozilla::Nothing; +using mozilla::Some; + +#ifdef ENABLE_WASM_SIMD + +// Specialization analysis for SIMD operations. This is still x86-centric but +// generalizes fairly easily to other architectures. + +// Optimization of v8x16.shuffle. The general byte shuffle+blend is very +// expensive (equivalent to at least a dozen instructions), and we want to avoid +// that if we can. So look for special cases - there are many. +// +// The strategy is to sort the operation into one of three buckets depending +// on the shuffle pattern and inputs: +// +// - single operand; shuffles on these values are rotations, reversals, +// transpositions, and general permutations +// - single-operand-with-interesting-constant (especially zero); shuffles on +// these values are often byte shift or scatter operations +// - dual operand; shuffles on these operations are blends, catenated +// shifts, and (in the worst case) general shuffle+blends +// +// We're not trying to solve the general problem, only to lower reasonably +// expressed patterns that express common operations. Producers that produce +// dense and convoluted patterns will end up with the general byte shuffle. +// Producers that produce simpler patterns that easily map to hardware will +// get faster code. +// +// In particular, these matchers do not try to combine transformations, so a +// shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is +// usually going to end up as a general byte shuffle. + +// Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return +// true, updating *control. +static bool ByteMaskToWordMask(SimdConstant* control) { + const SimdConstant::I8x16& lanes = control->asInt8x16(); + int16_t controlWords[8]; + for (int i = 0; i < 16; i += 2) { + if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) { + return false; + } + controlWords[i / 2] = int16_t(lanes[i] / 2); + } + *control = SimdConstant::CreateX8(controlWords); + return true; +} + +// Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return +// true, updating *control. +static bool ByteMaskToDWordMask(SimdConstant* control) { + const SimdConstant::I8x16& lanes = control->asInt8x16(); + int32_t controlDWords[4]; + for (int i = 0; i < 16; i += 4) { + if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 && + lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) { + return false; + } + controlDWords[i / 4] = lanes[i] / 4; + } + *control = SimdConstant::CreateX4(controlDWords); + return true; +} + +// Reduce a 0..31 byte mask to a 0..3 qword mask if possible and if so return +// true, updating *control. +static bool ByteMaskToQWordMask(SimdConstant* control) { + const SimdConstant::I8x16& lanes = control->asInt8x16(); + int64_t controlQWords[2]; + for (int i = 0; i < 16; i += 8) { + if (!((lanes[i] & 7) == 0 && lanes[i + 1] == lanes[i] + 1 && + lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3 && + lanes[i + 4] == lanes[i] + 4 && lanes[i + 5] == lanes[i] + 5 && + lanes[i + 6] == lanes[i] + 6 && lanes[i + 7] == lanes[i] + 7)) { + return false; + } + controlQWords[i / 8] = lanes[i] / 8; + } + *control = SimdConstant::CreateX2(controlQWords); + return true; +} + +// Skip across consecutive values in lanes starting at i, returning the index +// after the last element. Lane values must be <= len-1 ("masked"). +// +// Since every element is a 1-element run, the return value is never the same as +// the starting i. +template <typename T> +static int ScanIncreasingMasked(const T* lanes, int i) { + int len = int(16 / sizeof(T)); + MOZ_ASSERT(i < len); + MOZ_ASSERT(lanes[i] <= len - 1); + i++; + while (i < len && lanes[i] == lanes[i - 1] + 1) { + MOZ_ASSERT(lanes[i] <= len - 1); + i++; + } + return i; +} + +// Skip across consecutive values in lanes starting at i, returning the index +// after the last element. Lane values must be <= len*2-1 ("unmasked"); the +// values len-1 and len are not considered consecutive. +// +// Since every element is a 1-element run, the return value is never the same as +// the starting i. +template <typename T> +static int ScanIncreasingUnmasked(const T* lanes, int i) { + int len = int(16 / sizeof(T)); + MOZ_ASSERT(i < len); + if (lanes[i] < len) { + i++; + while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) { + i++; + } + } else { + i++; + while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) { + i++; + } + } + return i; +} + +// Skip lanes that equal v starting at i, returning the index just beyond the +// last of those. There is no requirement that the initial lanes[i] == v. +template <typename T> +static int ScanConstant(const T* lanes, int v, int i) { + int len = int(16 / sizeof(T)); + MOZ_ASSERT(i <= len); + while (i < len && lanes[i] == v) { + i++; + } + return i; +} + +// Mask lane values denoting rhs elements into lhs elements. +template <typename T> +static void MaskLanes(T* result, const T* input) { + int len = int(16 / sizeof(T)); + for (int i = 0; i < len; i++) { + result[i] = input[i] & (len - 1); + } +} + +// Apply a transformation to each lane value. +template <typename T> +static void MapLanes(T* result, const T* input, int (*f)(int)) { + // Hazard analysis trips on "IndirectCall: f" error. + // Suppress the check -- `f` is expected to be trivial here. + JS::AutoSuppressGCAnalysis nogc; + + int len = int(16 / sizeof(T)); + for (int i = 0; i < len; i++) { + result[i] = f(input[i]); + } +} + +// Recognize an identity permutation, assuming lanes is masked. +template <typename T> +static bool IsIdentity(const T* lanes) { + return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T)); +} + +// Recognize part of an identity permutation starting at start, with +// the first value of the permutation expected to be bias. +template <typename T> +static bool IsIdentity(const T* lanes, int start, int len, int bias) { + if (lanes[start] != bias) { + return false; + } + for (int i = start + 1; i < start + len; i++) { + if (lanes[i] != lanes[i - 1] + 1) { + return false; + } + } + return true; +} + +// We can permute by dwords if the mask is reducible to a dword mask, and in +// this case a single PSHUFD is enough. +static bool TryPermute32x4(SimdConstant* control) { + SimdConstant tmp = *control; + if (!ByteMaskToDWordMask(&tmp)) { + return false; + } + *control = tmp; + return true; +} + +// Can we perform a byte rotate right? We can use PALIGNR. The shift count is +// just lanes[0], and *control is unchanged. +static bool TryRotateRight8x16(SimdConstant* control) { + const SimdConstant::I8x16& lanes = control->asInt8x16(); + // Look for the end of the first run of consecutive bytes. + int i = ScanIncreasingMasked(lanes, 0); + + // First run must start at a value s.t. we have a rotate if all remaining + // bytes are a run. + if (lanes[0] != 16 - i) { + return false; + } + + // If we reached the end of the vector, we're done. + if (i == 16) { + return true; + } + + // Second run must start at source lane zero. + if (lanes[i] != 0) { + return false; + } + + // Second run must end at the end of the lane vector. + return ScanIncreasingMasked(lanes, i) == 16; +} + +// We can permute by words if the mask is reducible to a word mask. +static bool TryPermute16x8(SimdConstant* control) { + SimdConstant tmp = *control; + if (!ByteMaskToWordMask(&tmp)) { + return false; + } + *control = tmp; + return true; +} + +// A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD. +static bool TryBroadcast16x8(SimdConstant* control) { + SimdConstant tmp = *control; + if (!ByteMaskToWordMask(&tmp)) { + return false; + } + const SimdConstant::I16x8& lanes = tmp.asInt16x8(); + if (ScanConstant(lanes, lanes[0], 0) < 8) { + return false; + } + *control = tmp; + return true; +} + +// A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W + +// PSHUFD. +static bool TryBroadcast8x16(SimdConstant* control) { + const SimdConstant::I8x16& lanes = control->asInt8x16(); + return ScanConstant(lanes, lanes[0], 0) >= 16; +} + +// Look for permutations of a single operand. +static SimdPermuteOp AnalyzePermute(SimdConstant* control) { + // Lane indices are input-agnostic for single-operand permutations. + SimdConstant::I8x16 controlBytes; + MaskLanes(controlBytes, control->asInt8x16()); + + // Get rid of no-ops immediately, so nobody else needs to check. + if (IsIdentity(controlBytes)) { + return SimdPermuteOp::MOVE; + } + + // Default control is the masked bytes. + *control = SimdConstant::CreateX16(controlBytes); + + // Analysis order matters here and is architecture-dependent or even + // microarchitecture-dependent: ideally the cheapest implementation first. + // The Intel manual says that the cost of a PSHUFB is about five other + // operations, so make that our cutoff. + // + // Word, dword, and qword reversals are handled optimally by general permutes. + // + // Byte reversals are probably best left to PSHUFB, no alternative rendition + // seems to reliably go below five instructions. (Discuss.) + // + // Word swaps within doublewords and dword swaps within quadwords are handled + // optimally by general permutes. + // + // Dword and qword broadcasts are handled by dword permute. + + if (TryPermute32x4(control)) { + return SimdPermuteOp::PERMUTE_32x4; + } + if (TryRotateRight8x16(control)) { + return SimdPermuteOp::ROTATE_RIGHT_8x16; + } + if (TryBroadcast16x8(control)) { + return SimdPermuteOp::BROADCAST_16x8; + } + if (TryPermute16x8(control)) { + return SimdPermuteOp::PERMUTE_16x8; + } + if (TryBroadcast8x16(control)) { + return SimdPermuteOp::BROADCAST_8x16; + } + + // TODO: (From v8) Unzip and transpose generally have renditions that slightly + // beat a general permute (three or four instructions) + // + // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be + // used when merging two values. + // + // TODO: Byteswap is MOV + PSLLW + PSRLW + POR, a small win over PSHUFB. + + // The default operation is to permute bytes with the default control. + return SimdPermuteOp::PERMUTE_8x16; +} + +// Can we shift the bytes left or right by a constant? A shift is a run of +// lanes from the rhs (which is zero) on one end and a run of values from the +// lhs on the other end. +static Maybe<SimdPermuteOp> TryShift8x16(SimdConstant* control) { + const SimdConstant::I8x16& lanes = control->asInt8x16(); + + // Represent all zero lanes by 16 + SimdConstant::I8x16 zeroesMasked; + MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; }); + + int i = ScanConstant(zeroesMasked, 16, 0); + int shiftLeft = i; + if (shiftLeft > 0 && lanes[shiftLeft] != 0) { + return Nothing(); + } + + i = ScanIncreasingUnmasked(zeroesMasked, i); + int shiftRight = 16 - i; + if (shiftRight > 0 && lanes[i - 1] != 15) { + return Nothing(); + } + + i = ScanConstant(zeroesMasked, 16, i); + if (i < 16 || (shiftRight > 0 && shiftLeft > 0) || + (shiftRight == 0 && shiftLeft == 0)) { + return Nothing(); + } + + if (shiftRight) { + *control = SimdConstant::SplatX16((int8_t)shiftRight); + return Some(SimdPermuteOp::SHIFT_RIGHT_8x16); + } + *control = SimdConstant::SplatX16((int8_t)shiftLeft); + return Some(SimdPermuteOp::SHIFT_LEFT_8x16); +} + +static Maybe<SimdPermuteOp> AnalyzeShuffleWithZero(SimdConstant* control) { + Maybe<SimdPermuteOp> op; + op = TryShift8x16(control); + if (op) { + return op; + } + + // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST; + // PAND. This may beat the general byte blend code below. + return Nothing(); +} + +// Concat: if the result is the suffix (high bytes) of the rhs in front of a +// prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are +// swapped. +static Maybe<SimdShuffleOp> TryConcatRightShift8x16(SimdConstant* control, + bool* swapOperands) { + const SimdConstant::I8x16& lanes = control->asInt8x16(); + int i = ScanIncreasingUnmasked(lanes, 0); + MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere"); + // First run must end with 15 % 16 + if ((lanes[i - 1] & 15) != 15) { + return Nothing(); + } + // Second run must start with 0 % 16 + if ((lanes[i] & 15) != 0) { + return Nothing(); + } + // The two runs must come from different inputs + if ((lanes[i] & 16) == (lanes[i - 1] & 16)) { + return Nothing(); + } + int suffixLength = i; + + i = ScanIncreasingUnmasked(lanes, i); + // Must end at the left end + if (i != 16) { + return Nothing(); + } + + // If the suffix is from the lhs then swap the operands + if (lanes[0] < 16) { + *swapOperands = !*swapOperands; + } + *control = SimdConstant::SplatX16((int8_t)suffixLength); + return Some(SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16); +} + +// Blend words: if we pick words from both operands without a pattern but all +// the input words stay in their position then this is PBLENDW (immediate mask); +// this also handles all larger sizes on x64. +static Maybe<SimdShuffleOp> TryBlendInt16x8(SimdConstant* control) { + SimdConstant tmp(*control); + if (!ByteMaskToWordMask(&tmp)) { + return Nothing(); + } + SimdConstant::I16x8 masked; + MaskLanes(masked, tmp.asInt16x8()); + if (!IsIdentity(masked)) { + return Nothing(); + } + SimdConstant::I16x8 mapped; + MapLanes(mapped, tmp.asInt16x8(), + [](int x) -> int { return x < 8 ? 0 : -1; }); + *control = SimdConstant::CreateX8(mapped); + return Some(SimdShuffleOp::BLEND_16x8); +} + +// Blend bytes: if we pick bytes ditto then this is a byte blend, which can be +// handled with a CONST, PAND, PANDNOT, and POR. +// +// TODO: Optimization opportunity? If we pick all but one lanes from one with at +// most one from the other then it could be a MOV + PEXRB + PINSRB (also if this +// element is not in its source location). +static Maybe<SimdShuffleOp> TryBlendInt8x16(SimdConstant* control) { + SimdConstant::I8x16 masked; + MaskLanes(masked, control->asInt8x16()); + if (!IsIdentity(masked)) { + return Nothing(); + } + SimdConstant::I8x16 mapped; + MapLanes(mapped, control->asInt8x16(), + [](int x) -> int { return x < 16 ? 0 : -1; }); + *control = SimdConstant::CreateX16(mapped); + return Some(SimdShuffleOp::BLEND_8x16); +} + +template <typename T> +static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) { + for (int i = 0; i < len; i++) { + if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) { + return false; + } + } + return true; +} + +// Unpack/interleave: +// - if we interleave the low (bytes/words/doublewords) of the inputs into +// the output then this is UNPCKL*W (possibly with a swap of operands). +// - if we interleave the high ditto then it is UNPCKH*W (ditto) +template <typename T> +static Maybe<SimdShuffleOp> TryInterleave(const T* lanes, int lhs, int rhs, + bool* swapOperands, + SimdShuffleOp lowOp, + SimdShuffleOp highOp) { + int len = int(32 / (sizeof(T) * 4)); + if (MatchInterleave(lanes, lhs, rhs, len)) { + return Some(lowOp); + } + if (MatchInterleave(lanes, rhs, lhs, len)) { + *swapOperands = !*swapOperands; + return Some(lowOp); + } + if (MatchInterleave(lanes, lhs + len, rhs + len, len)) { + return Some(highOp); + } + if (MatchInterleave(lanes, rhs + len, lhs + len, len)) { + *swapOperands = !*swapOperands; + return Some(highOp); + } + return Nothing(); +} + +static Maybe<SimdShuffleOp> TryInterleave64x2(SimdConstant* control, + bool* swapOperands) { + SimdConstant tmp = *control; + if (!ByteMaskToQWordMask(&tmp)) { + return Nothing(); + } + const SimdConstant::I64x2& lanes = tmp.asInt64x2(); + return TryInterleave(lanes, 0, 2, swapOperands, + SimdShuffleOp::INTERLEAVE_LOW_64x2, + SimdShuffleOp::INTERLEAVE_HIGH_64x2); +} + +static Maybe<SimdShuffleOp> TryInterleave32x4(SimdConstant* control, + bool* swapOperands) { + SimdConstant tmp = *control; + if (!ByteMaskToDWordMask(&tmp)) { + return Nothing(); + } + const SimdConstant::I32x4& lanes = tmp.asInt32x4(); + return TryInterleave(lanes, 0, 4, swapOperands, + SimdShuffleOp::INTERLEAVE_LOW_32x4, + SimdShuffleOp::INTERLEAVE_HIGH_32x4); +} + +static Maybe<SimdShuffleOp> TryInterleave16x8(SimdConstant* control, + bool* swapOperands) { + SimdConstant tmp = *control; + if (!ByteMaskToWordMask(&tmp)) { + return Nothing(); + } + const SimdConstant::I16x8& lanes = tmp.asInt16x8(); + return TryInterleave(lanes, 0, 8, swapOperands, + SimdShuffleOp::INTERLEAVE_LOW_16x8, + SimdShuffleOp::INTERLEAVE_HIGH_16x8); +} + +static Maybe<SimdShuffleOp> TryInterleave8x16(SimdConstant* control, + bool* swapOperands) { + const SimdConstant::I8x16& lanes = control->asInt8x16(); + return TryInterleave(lanes, 0, 16, swapOperands, + SimdShuffleOp::INTERLEAVE_LOW_8x16, + SimdShuffleOp::INTERLEAVE_HIGH_8x16); +} + +static SimdShuffleOp AnalyzeTwoArgShuffle(SimdConstant* control, + bool* swapOperands) { + Maybe<SimdShuffleOp> op; + op = TryConcatRightShift8x16(control, swapOperands); + if (!op) { + op = TryBlendInt16x8(control); + } + if (!op) { + op = TryBlendInt8x16(control); + } + if (!op) { + op = TryInterleave64x2(control, swapOperands); + } + if (!op) { + op = TryInterleave32x4(control, swapOperands); + } + if (!op) { + op = TryInterleave16x8(control, swapOperands); + } + if (!op) { + op = TryInterleave8x16(control, swapOperands); + } + if (!op) { + op = Some(SimdShuffleOp::SHUFFLE_BLEND_8x16); + } + return *op; +} + +// Reorder the operands if that seems useful, notably, move a constant to the +// right hand side. Rewrites the control to account for any move. +static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs, + SimdConstant* control) { + if ((*lhs)->isWasmFloatConstant()) { + MDefinition* tmp = *lhs; + *lhs = *rhs; + *rhs = tmp; + + int8_t controlBytes[16]; + const SimdConstant::I8x16& lanes = control->asInt8x16(); + for (unsigned i = 0; i < 16; i++) { + controlBytes[i] = int8_t(lanes[i] ^ 16); + } + *control = SimdConstant::CreateX16(controlBytes); + + return true; + } + return false; +} + +# ifdef DEBUG +static const SimdShuffle& ReportShuffleSpecialization(const SimdShuffle& s) { + switch (s.opd) { + case SimdShuffle::Operand::BOTH: + case SimdShuffle::Operand::BOTH_SWAPPED: + switch (*s.shuffleOp) { + case SimdShuffleOp::SHUFFLE_BLEND_8x16: + js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16"); + break; + case SimdShuffleOp::BLEND_8x16: + js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16"); + break; + case SimdShuffleOp::BLEND_16x8: + js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8"); + break; + case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: + js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16"); + break; + case SimdShuffleOp::INTERLEAVE_HIGH_8x16: + js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16"); + break; + case SimdShuffleOp::INTERLEAVE_HIGH_16x8: + js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8"); + break; + case SimdShuffleOp::INTERLEAVE_HIGH_32x4: + js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4"); + break; + case SimdShuffleOp::INTERLEAVE_HIGH_64x2: + js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 64x2"); + break; + case SimdShuffleOp::INTERLEAVE_LOW_8x16: + js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16"); + break; + case SimdShuffleOp::INTERLEAVE_LOW_16x8: + js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8"); + break; + case SimdShuffleOp::INTERLEAVE_LOW_32x4: + js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4"); + break; + case SimdShuffleOp::INTERLEAVE_LOW_64x2: + js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 64x2"); + break; + default: + MOZ_CRASH("Unexpected shuffle op"); + } + break; + case SimdShuffle::Operand::LEFT: + case SimdShuffle::Operand::RIGHT: + switch (*s.permuteOp) { + case SimdPermuteOp::BROADCAST_8x16: + js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16"); + break; + case SimdPermuteOp::BROADCAST_16x8: + js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8"); + break; + case SimdPermuteOp::MOVE: + js::wasm::ReportSimdAnalysis("shuffle -> move"); + break; + case SimdPermuteOp::PERMUTE_8x16: + js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16"); + break; + case SimdPermuteOp::PERMUTE_16x8: + js::wasm::ReportSimdAnalysis("shuffle -> permute 16x8"); + break; + case SimdPermuteOp::PERMUTE_32x4: + js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4"); + break; + case SimdPermuteOp::ROTATE_RIGHT_8x16: + js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16"); + break; + case SimdPermuteOp::SHIFT_LEFT_8x16: + js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16"); + break; + case SimdPermuteOp::SHIFT_RIGHT_8x16: + js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16"); + break; + default: + MOZ_CRASH("Unexpected permute op"); + } + break; + } + return s; +} +# endif // DEBUG + +SimdShuffle jit::AnalyzeSimdShuffle(SimdConstant control, MDefinition* lhs, + MDefinition* rhs) { +# ifdef DEBUG +# define R(s) ReportShuffleSpecialization(s) +# else +# define R(s) (s) +# endif + + // If only one of the inputs is used, determine which. + bool useLeft = true; + bool useRight = true; + if (lhs == rhs) { + useRight = false; + } else { + bool allAbove = true; + bool allBelow = true; + const SimdConstant::I8x16& lanes = control.asInt8x16(); + for (int8_t i : lanes) { + allAbove = allAbove && i >= 16; + allBelow = allBelow && i < 16; + } + if (allAbove) { + useLeft = false; + } else if (allBelow) { + useRight = false; + } + } + + // Deal with one-ignored-input. + if (!(useLeft && useRight)) { + SimdPermuteOp op = AnalyzePermute(&control); + return R(SimdShuffle::permute( + useLeft ? SimdShuffle::Operand::LEFT : SimdShuffle::Operand::RIGHT, + control, op)); + } + + // Move constants to rhs. + bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control); + + // Deal with constant rhs. + if (rhs->isWasmFloatConstant()) { + SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128(); + if (rhsConstant.isZeroBits()) { + Maybe<SimdPermuteOp> op = AnalyzeShuffleWithZero(&control); + if (op) { + return R(SimdShuffle::permute(swapOperands ? SimdShuffle::Operand::RIGHT + : SimdShuffle::Operand::LEFT, + control, *op)); + } + } + } + + // Two operands both of which are used. If there's one constant operand it is + // now on the rhs. + SimdShuffleOp op = AnalyzeTwoArgShuffle(&control, &swapOperands); + return R(SimdShuffle::shuffle(swapOperands + ? SimdShuffle::Operand::BOTH_SWAPPED + : SimdShuffle::Operand::BOTH, + control, op)); +# undef R +} + +#endif // ENABLE_WASM_SIMD diff --git a/js/src/jit/ShuffleAnalysis.h b/js/src/jit/ShuffleAnalysis.h new file mode 100644 index 0000000000000..c2863b5f8a5ee --- /dev/null +++ b/js/src/jit/ShuffleAnalysis.h @@ -0,0 +1,133 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_ShuffleAnalysis_h +#define jit_ShuffleAnalysis_h + +#include "jit/IonTypes.h" + +namespace js { +namespace jit { + +class MDefinition; + +// Permutation operations. NOTE: these may still be x86-centric, but the set +// can accomodate operations from other architectures. +// +// The "low-order" byte is in lane 0 of an 8x16 datum, the "high-order" byte +// in lane 15. The low-order byte is also the "rightmost". In wasm, the +// constant (v128.const i8x16 0 1 2 ... 15) has 0 in the low-order byte and 15 +// in the high-order byte. +enum class SimdPermuteOp { + // A single byte lane is copied into all the other byte lanes. control_[0] + // has the source lane. + BROADCAST_8x16, + + // A single word lane is copied into all the other word lanes. control_[0] + // has the source lane. + BROADCAST_16x8, + + // Copy input to output. + MOVE, + + // control_ has bytes in range 0..15 s.t. control_[i] holds the source lane + // for output lane i. + PERMUTE_8x16, + + // control_ has int16s in range 0..7, as for 8x16. In addition, the high + // byte of control_[0] has flags detailing the operation, values taken + // from the Perm16x8Action enum below. + PERMUTE_16x8, + + // control_ has int32s in range 0..3, as for 8x16. + PERMUTE_32x4, + + // control_[0] has the number of places to rotate by. + ROTATE_RIGHT_8x16, + + // Zeroes are shifted into high-order bytes and low-order bytes are lost. + // control_[0] has the number of places to shift by. + SHIFT_RIGHT_8x16, + + // Zeroes are shifted into low-order bytes and high-order bytes are lost. + // control_[0] has the number of places to shift by. + SHIFT_LEFT_8x16, +}; + +// Shuffle operations. NOTE: these may still be x86-centric, but the set can +// accomodate operations from other architectures. +enum class SimdShuffleOp { + // Blend bytes. control_ has the blend mask as an I8x16: 0 to select from + // the lhs, -1 to select from the rhs. + BLEND_8x16, + + // Blend words. control_ has the blend mask as an I16x8: 0 to select from + // the lhs, -1 to select from the rhs. + BLEND_16x8, + + // Concat the lhs in front of the rhs and shift right by bytes, extracting + // the low 16 bytes; control_[0] has the shift count. + CONCAT_RIGHT_SHIFT_8x16, + + // Interleave qwords/dwords/words/bytes from high/low halves of operands. + // The low-order item in the result comes from the lhs, then the next from + // the rhs, and so on. control_ is ignored. + INTERLEAVE_HIGH_8x16, + INTERLEAVE_HIGH_16x8, + INTERLEAVE_HIGH_32x4, + INTERLEAVE_HIGH_64x2, + INTERLEAVE_LOW_8x16, + INTERLEAVE_LOW_16x8, + INTERLEAVE_LOW_32x4, + INTERLEAVE_LOW_64x2, + + // Fully general shuffle+blend. control_ has the shuffle mask. + SHUFFLE_BLEND_8x16, +}; + +// Representation of the result of the shuffle analysis. +struct SimdShuffle { + enum class Operand { + // Both inputs, in the original lhs-rhs order + BOTH, + // Both inputs, but in rhs-lhs order + BOTH_SWAPPED, + // Only the lhs input + LEFT, + // Only the rhs input + RIGHT, + }; + + Operand opd; + SimdConstant control; + mozilla::Maybe<SimdPermuteOp> permuteOp; // Single operands + mozilla::Maybe<SimdShuffleOp> shuffleOp; // Double operands + + static SimdShuffle permute(Operand opd, SimdConstant control, + SimdPermuteOp op) { + MOZ_ASSERT(opd == Operand::LEFT || opd == Operand::RIGHT); + SimdShuffle s{opd, control, mozilla::Some(op), mozilla::Nothing()}; + return s; + } + + static SimdShuffle shuffle(Operand opd, SimdConstant control, + SimdShuffleOp op) { + MOZ_ASSERT(opd == Operand::BOTH || opd == Operand::BOTH_SWAPPED); + SimdShuffle s{opd, control, mozilla::Nothing(), mozilla::Some(op)}; + return s; + } +}; + +#ifdef ENABLE_WASM_SIMD + +SimdShuffle AnalyzeSimdShuffle(SimdConstant control, MDefinition* lhs, + MDefinition* rhs); + +#endif + +} // namespace jit +} // namespace js + +#endif // jit_ShuffleAnalysis_h diff --git a/js/src/jit/arm64/CodeGenerator-arm64.cpp b/js/src/jit/arm64/CodeGenerator-arm64.cpp index 46d3a8ef39929..6cda7a0a6ea81 100644 --- a/js/src/jit/arm64/CodeGenerator-arm64.cpp +++ b/js/src/jit/arm64/CodeGenerator-arm64.cpp @@ -3507,55 +3507,55 @@ void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) { MOZ_ASSERT(ins->temp()->isBogusTemp()); SimdConstant control = ins->control(); switch (ins->op()) { - case LWasmShuffleSimd128::BLEND_8x16: { + case SimdShuffleOp::BLEND_8x16: { masm.blendInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()), lhs, rhs, dest); break; } - case LWasmShuffleSimd128::BLEND_16x8: { + case SimdShuffleOp::BLEND_16x8: { masm.blendInt16x8(reinterpret_cast<const uint16_t*>(control.asInt16x8()), lhs, rhs, dest); break; } - case LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16: { + case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: { int8_t count = 16 - control.asInt8x16()[0]; MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); masm.concatAndRightShiftSimd128(lhs, rhs, dest, count); break; } - case LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16: { + case SimdShuffleOp::INTERLEAVE_HIGH_8x16: { masm.interleaveHighInt8x16(lhs, rhs, dest); break; } - case LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8: { + case SimdShuffleOp::INTERLEAVE_HIGH_16x8: { masm.interleaveHighInt16x8(lhs, rhs, dest); break; } - case LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4: { + case SimdShuffleOp::INTERLEAVE_HIGH_32x4: { masm.interleaveHighInt32x4(lhs, rhs, dest); break; } - case LWasmShuffleSimd128::INTERLEAVE_HIGH_64x2: { + case SimdShuffleOp::INTERLEAVE_HIGH_64x2: { masm.interleaveHighInt64x2(lhs, rhs, dest); break; } - case LWasmShuffleSimd128::INTERLEAVE_LOW_8x16: { + case SimdShuffleOp::INTERLEAVE_LOW_8x16: { masm.interleaveLowInt8x16(lhs, rhs, dest); break; } - case LWasmShuffleSimd128::INTERLEAVE_LOW_16x8: { + case SimdShuffleOp::INTERLEAVE_LOW_16x8: { masm.interleaveLowInt16x8(lhs, rhs, dest); break; } - case LWasmShuffleSimd128::INTERLEAVE_LOW_32x4: { + case SimdShuffleOp::INTERLEAVE_LOW_32x4: { masm.interleaveLowInt32x4(lhs, rhs, dest); break; } - case LWasmShuffleSimd128::INTERLEAVE_LOW_64x2: { + case SimdShuffleOp::INTERLEAVE_LOW_64x2: { masm.interleaveLowInt64x2(lhs, rhs, dest); break; } - case LWasmShuffleSimd128::SHUFFLE_BLEND_8x16: { + case SimdShuffleOp::SHUFFLE_BLEND_8x16: { masm.shuffleInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()), lhs, rhs, dest); break; @@ -3575,23 +3575,23 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { FloatRegister dest = ToFloatRegister(ins->output()); SimdConstant control = ins->control(); switch (ins->op()) { - case LWasmPermuteSimd128::BROADCAST_8x16: { + case SimdPermuteOp::BROADCAST_8x16: { const SimdConstant::I8x16& mask = control.asInt8x16(); int8_t source = mask[0]; masm.splatX16(source, src, dest); break; } - case LWasmPermuteSimd128::BROADCAST_16x8: { + case SimdPermuteOp::BROADCAST_16x8: { const SimdConstant::I16x8& mask = control.asInt16x8(); int16_t source = mask[0]; masm.splatX8(source, src, dest); break; } - case LWasmPermuteSimd128::MOVE: { + case SimdPermuteOp::MOVE: { masm.moveSimd128(src, dest); break; } - case LWasmPermuteSimd128::PERMUTE_8x16: { + case SimdPermuteOp::PERMUTE_8x16: { const SimdConstant::I8x16& mask = control.asInt8x16(); # ifdef DEBUG mozilla::DebugOnly<int> i; @@ -3602,7 +3602,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { masm.permuteInt8x16(reinterpret_cast<const uint8_t*>(mask), src, dest); break; } - case LWasmPermuteSimd128::PERMUTE_16x8: { + case SimdPermuteOp::PERMUTE_16x8: { const SimdConstant::I16x8& mask = control.asInt16x8(); # ifdef DEBUG mozilla::DebugOnly<int> i; @@ -3613,7 +3613,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { masm.permuteInt16x8(reinterpret_cast<const uint16_t*>(mask), src, dest); break; } - case LWasmPermuteSimd128::PERMUTE_32x4: { + case SimdPermuteOp::PERMUTE_32x4: { const SimdConstant::I32x4& mask = control.asInt32x4(); # ifdef DEBUG mozilla::DebugOnly<int> i; @@ -3624,19 +3624,19 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { masm.permuteInt32x4(reinterpret_cast<const uint32_t*>(mask), src, dest); break; } - case LWasmPermuteSimd128::ROTATE_RIGHT_8x16: { + case SimdPermuteOp::ROTATE_RIGHT_8x16: { int8_t count = control.asInt8x16()[0]; MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); masm.rotateRightSimd128(src, dest, count); break; } - case LWasmPermuteSimd128::SHIFT_LEFT_8x16: { + case SimdPermuteOp::SHIFT_LEFT_8x16: { int8_t count = control.asInt8x16()[0]; MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); masm.leftShiftSimd128(Imm32(count), src, dest); break; } - case LWasmPermuteSimd128::SHIFT_RIGHT_8x16: { + case SimdPermuteOp::SHIFT_RIGHT_8x16: { int8_t count = control.asInt8x16()[0]; MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); masm.rightShiftSimd128(Imm32(count), src, dest); diff --git a/js/src/jit/arm64/Lowering-arm64.cpp b/js/src/jit/arm64/Lowering-arm64.cpp index 2c9c3689070db..212ea8aaf85bf 100644 --- a/js/src/jit/arm64/Lowering-arm64.cpp +++ b/js/src/jit/arm64/Lowering-arm64.cpp @@ -1120,29 +1120,26 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) { MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128); MOZ_ASSERT(ins->type() == MIRType::Simd128); - Shuffle s = AnalyzeShuffle(ins); -# ifdef DEBUG - ReportShuffleSpecialization(s); -# endif + SimdShuffle s = ins->shuffle(); switch (s.opd) { - case Shuffle::Operand::LEFT: - case Shuffle::Operand::RIGHT: { + case SimdShuffle::Operand::LEFT: + case SimdShuffle::Operand::RIGHT: { LAllocation src; switch (*s.permuteOp) { - case LWasmPermuteSimd128::MOVE: - case LWasmPermuteSimd128::BROADCAST_8x16: - case LWasmPermuteSimd128::BROADCAST_16x8: - case LWasmPermuteSimd128::PERMUTE_8x16: - case LWasmPermuteSimd128::PERMUTE_16x8: - case LWasmPermuteSimd128::PERMUTE_32x4: - case LWasmPermuteSimd128::ROTATE_RIGHT_8x16: - case LWasmPermuteSimd128::SHIFT_LEFT_8x16: - case LWasmPermuteSimd128::SHIFT_RIGHT_8x16: + case SimdPermuteOp::MOVE: + case SimdPermuteOp::BROADCAST_8x16: + case SimdPermuteOp::BROADCAST_16x8: + case SimdPermuteOp::PERMUTE_8x16: + case SimdPermuteOp::PERMUTE_16x8: + case SimdPermuteOp::PERMUTE_32x4: + case SimdPermuteOp::ROTATE_RIGHT_8x16: + case SimdPermuteOp::SHIFT_LEFT_8x16: + case SimdPermuteOp::SHIFT_RIGHT_8x16: break; default: MOZ_CRASH("Unexpected operator"); } - if (s.opd == Shuffle::Operand::LEFT) { + if (s.opd == SimdShuffle::Operand::LEFT) { src = useRegisterAtStart(ins->lhs()); } else { src = useRegisterAtStart(ins->rhs()); @@ -1152,12 +1149,12 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) { define(lir, ins); break; } - case Shuffle::Operand::BOTH: - case Shuffle::Operand::BOTH_SWAPPED: { + case SimdShuffle::Operand::BOTH: + case SimdShuffle::Operand::BOTH_SWAPPED: { LDefinition temp = LDefinition::BogusTemp(); LAllocation lhs; LAllocation rhs; - if (s.opd == Shuffle::Operand::BOTH) { + if (s.opd == SimdShuffle::Operand::BOTH) { lhs = useRegisterAtStart(ins->lhs()); rhs = useRegisterAtStart(ins->rhs()); } else { diff --git a/js/src/jit/moz.build b/js/src/jit/moz.build index 85c2aa191db01..d811aa17c9987 100644 --- a/js/src/jit/moz.build +++ b/js/src/jit/moz.build @@ -78,6 +78,7 @@ UNIFIED_SOURCES += [ "shared/CodeGenerator-shared.cpp", "shared/Disassembler-shared.cpp", "shared/Lowering-shared.cpp", + "ShuffleAnalysis.cpp", "Sink.cpp", "Snapshots.cpp", "TrialInlining.cpp", diff --git a/js/src/jit/shared/LIR-shared.h b/js/src/jit/shared/LIR-shared.h index 2a0e488355694..eed4d0f9b4f3f 100644 --- a/js/src/jit/shared/LIR-shared.h +++ b/js/src/jit/shared/LIR-shared.h @@ -3614,40 +3614,8 @@ class LWasmSignReplicationSimd128 : public LInstructionHelper<1, 1, 0> { // (v128, v128, imm_simd) -> v128 effect-free operation. // temp is FPR (and always in use). class LWasmShuffleSimd128 : public LInstructionHelper<1, 2, 1> { - public: - // Shuffle operations. NOTE: these may still be x86-centric, but the set can - // accomodate operations from other architectures. - enum Op { - // Blend bytes. control_ has the blend mask as an I8x16: 0 to select from - // the lhs, -1 to select from the rhs. - BLEND_8x16, - - // Blend words. control_ has the blend mask as an I16x8: 0 to select from - // the lhs, -1 to select from the rhs. - BLEND_16x8, - - // Concat the lhs in front of the rhs and shift right by bytes, extracting - // the low 16 bytes; control_[0] has the shift count. - CONCAT_RIGHT_SHIFT_8x16, - - // Interleave qwords/dwords/words/bytes from high/low halves of operands. - // The low-order item in the result comes from the lhs, then the next from - // the rhs, and so on. control_ is ignored. - INTERLEAVE_HIGH_8x16, - INTERLEAVE_HIGH_16x8, - INTERLEAVE_HIGH_32x4, - INTERLEAVE_HIGH_64x2, - INTERLEAVE_LOW_8x16, - INTERLEAVE_LOW_16x8, - INTERLEAVE_LOW_32x4, - INTERLEAVE_LOW_64x2, - - // Fully general shuffle+blend. control_ has the shuffle mask. - SHUFFLE_BLEND_8x16, - }; - private: - Op op_; + SimdShuffleOp op_; SimdConstant control_; public: @@ -3658,7 +3626,8 @@ class LWasmShuffleSimd128 : public LInstructionHelper<1, 2, 1> { static constexpr uint32_t Rhs = 1; LWasmShuffleSimd128(const LAllocation& lhs, const LAllocation& rhs, - const LDefinition& temp, Op op, SimdConstant control) + const LDefinition& temp, SimdShuffleOp op, + SimdConstant control) : LInstructionHelper(classOpcode), op_(op), control_(control) { setOperand(Lhs, lhs); setOperand(Rhs, rhs); @@ -3669,58 +3638,14 @@ class LWasmShuffleSimd128 : public LInstructionHelper<1, 2, 1> { const LAllocation* lhsDest() { return getOperand(LhsDest); } const LAllocation* rhs() { return getOperand(Rhs); } const LDefinition* temp() { return getTemp(0); } - Op op() { return op_; } + SimdShuffleOp op() { return op_; } SimdConstant control() { return control_; } }; // (v128, imm_simd) -> v128 effect-free operation. class LWasmPermuteSimd128 : public LInstructionHelper<1, 1, 0> { - public: - // Permutation operations. NOTE: these may still be x86-centric, but the set - // can accomodate operations from other architectures. - // - // The "low-order" byte is in lane 0 of an 8x16 datum, the "high-order" byte - // in lane 15. The low-order byte is also the "rightmost". In wasm, the - // constant (v128.const i8x16 0 1 2 ... 15) has 0 in the low-order byte and 15 - // in the high-order byte. - enum Op { - // A single byte lane is copied into all the other byte lanes. control_[0] - // has the source lane. - BROADCAST_8x16, - - // A single word lane is copied into all the other word lanes. control_[0] - // has the source lane. - BROADCAST_16x8, - - // Copy input to output. - MOVE, - - // control_ has bytes in range 0..15 s.t. control_[i] holds the source lane - // for output lane i. - PERMUTE_8x16, - - // control_ has int16s in range 0..7, as for 8x16. In addition, the high - // byte of control_[0] has flags detailing the operation, values taken - // from the Perm16x8Action enum below. - PERMUTE_16x8, - - // control_ has int32s in range 0..3, as for 8x16. - PERMUTE_32x4, - - // control_[0] has the number of places to rotate by. - ROTATE_RIGHT_8x16, - - // Zeroes are shifted into high-order bytes and low-order bytes are lost. - // control_[0] has the number of places to shift by. - SHIFT_RIGHT_8x16, - - // Zeroes are shifted into low-order bytes and high-order bytes are lost. - // control_[0] has the number of places to shift by. - SHIFT_LEFT_8x16, - }; - private: - Op op_; + SimdPermuteOp op_; SimdConstant control_; public: @@ -3728,13 +3653,14 @@ class LWasmPermuteSimd128 : public LInstructionHelper<1, 1, 0> { static constexpr uint32_t Src = 0; - LWasmPermuteSimd128(const LAllocation& src, Op op, SimdConstant control) + LWasmPermuteSimd128(const LAllocation& src, SimdPermuteOp op, + SimdConstant control) : LInstructionHelper(classOpcode), op_(op), control_(control) { setOperand(Src, src); } const LAllocation* src() { return getOperand(Src); } - Op op() { return op_; } + SimdPermuteOp op() { return op_; } SimdConstant control() { return control_; } }; diff --git a/js/src/jit/shared/Lowering-shared.cpp b/js/src/jit/shared/Lowering-shared.cpp index 18bfe0cdb23ab..b363f6096fa20 100644 --- a/js/src/jit/shared/Lowering-shared.cpp +++ b/js/src/jit/shared/Lowering-shared.cpp @@ -338,701 +338,3 @@ void LIRGeneratorShared::lowerWasmCompareAndSelect(MWasmSelect* ins, useRegisterAtStart(ins->trueExpr()), useAny(ins->falseExpr())); defineReuseInput(lir, ins, LWasmCompareAndSelect::IfTrueExprIndex); } - -#ifdef ENABLE_WASM_SIMD - -// Specialization analysis for SIMD operations. This is still x86-centric but -// generalizes fairly easily to other architectures. - -// Optimization of v8x16.shuffle. The general byte shuffle+blend is very -// expensive (equivalent to at least a dozen instructions), and we want to avoid -// that if we can. So look for special cases - there are many. -// -// The strategy is to sort the operation into one of three buckets depending -// on the shuffle pattern and inputs: -// -// - single operand; shuffles on these values are rotations, reversals, -// transpositions, and general permutations -// - single-operand-with-interesting-constant (especially zero); shuffles on -// these values are often byte shift or scatter operations -// - dual operand; shuffles on these operations are blends, catenated -// shifts, and (in the worst case) general shuffle+blends -// -// We're not trying to solve the general problem, only to lower reasonably -// expressed patterns that express common operations. Producers that produce -// dense and convoluted patterns will end up with the general byte shuffle. -// Producers that produce simpler patterns that easily map to hardware will -// get faster code. -// -// In particular, these matchers do not try to combine transformations, so a -// shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is -// usually going to end up as a general byte shuffle. - -// Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return -// true, updating *control. -static bool ByteMaskToWordMask(SimdConstant* control) { - const SimdConstant::I8x16& lanes = control->asInt8x16(); - int16_t controlWords[8]; - for (int i = 0; i < 16; i += 2) { - if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) { - return false; - } - controlWords[i / 2] = lanes[i] / 2; - } - *control = SimdConstant::CreateX8(controlWords); - return true; -} - -// Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return -// true, updating *control. -static bool ByteMaskToDWordMask(SimdConstant* control) { - const SimdConstant::I8x16& lanes = control->asInt8x16(); - int32_t controlDWords[4]; - for (int i = 0; i < 16; i += 4) { - if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 && - lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) { - return false; - } - controlDWords[i / 4] = lanes[i] / 4; - } - *control = SimdConstant::CreateX4(controlDWords); - return true; -} - -// Reduce a 0..31 byte mask to a 0..3 qword mask if possible and if so return -// true, updating *control. -static bool ByteMaskToQWordMask(SimdConstant* control) { - const SimdConstant::I8x16& lanes = control->asInt8x16(); - int64_t controlQWords[2]; - for (int i = 0; i < 16; i += 8) { - if (!((lanes[i] & 7) == 0 && lanes[i + 1] == lanes[i] + 1 && - lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3 && - lanes[i + 4] == lanes[i] + 4 && lanes[i + 5] == lanes[i] + 5 && - lanes[i + 6] == lanes[i] + 6 && lanes[i + 7] == lanes[i] + 7)) { - return false; - } - controlQWords[i / 8] = lanes[i] / 8; - } - *control = SimdConstant::CreateX2(controlQWords); - return true; -} - -// Skip across consecutive values in lanes starting at i, returning the index -// after the last element. Lane values must be <= len-1 ("masked"). -// -// Since every element is a 1-element run, the return value is never the same as -// the starting i. -template <typename T> -static int ScanIncreasingMasked(const T* lanes, int i) { - int len = int(16 / sizeof(T)); - MOZ_ASSERT(i < len); - MOZ_ASSERT(lanes[i] <= len - 1); - i++; - while (i < len && lanes[i] == lanes[i - 1] + 1) { - MOZ_ASSERT(lanes[i] <= len - 1); - i++; - } - return i; -} - -// Skip across consecutive values in lanes starting at i, returning the index -// after the last element. Lane values must be <= len*2-1 ("unmasked"); the -// values len-1 and len are not considered consecutive. -// -// Since every element is a 1-element run, the return value is never the same as -// the starting i. -template <typename T> -static int ScanIncreasingUnmasked(const T* lanes, int i) { - int len = int(16 / sizeof(T)); - MOZ_ASSERT(i < len); - if (lanes[i] < len) { - i++; - while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) { - i++; - } - } else { - i++; - while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) { - i++; - } - } - return i; -} - -// Skip lanes that equal v starting at i, returning the index just beyond the -// last of those. There is no requirement that the initial lanes[i] == v. -template <typename T> -static int ScanConstant(const T* lanes, int v, int i) { - int len = int(16 / sizeof(T)); - MOZ_ASSERT(i <= len); - while (i < len && lanes[i] == v) { - i++; - } - return i; -} - -// Mask lane values denoting rhs elements into lhs elements. -template <typename T> -static void MaskLanes(T* result, const T* input) { - int len = int(16 / sizeof(T)); - for (int i = 0; i < len; i++) { - result[i] = input[i] & (len - 1); - } -} - -// Apply a transformation to each lane value. -template <typename T> -static void MapLanes(T* result, const T* input, int (*f)(int)) { - int len = int(16 / sizeof(T)); - for (int i = 0; i < len; i++) { - result[i] = f(input[i]); - } -} - -// Recognize an identity permutation, assuming lanes is masked. -template <typename T> -static bool IsIdentity(const T* lanes) { - return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T)); -} - -// Recognize part of an identity permutation starting at start, with -// the first value of the permutation expected to be bias. -template <typename T> -static bool IsIdentity(const T* lanes, int start, int len, int bias) { - if (lanes[start] != bias) { - return false; - } - for (int i = start + 1; i < start + len; i++) { - if (lanes[i] != lanes[i - 1] + 1) { - return false; - } - } - return true; -} - -// We can permute by dwords if the mask is reducible to a dword mask, and in -// this case a single PSHUFD is enough. -static bool TryPermute32x4(SimdConstant* control) { - SimdConstant tmp = *control; - if (!ByteMaskToDWordMask(&tmp)) { - return false; - } - *control = tmp; - return true; -} - -// Can we perform a byte rotate right? We can use PALIGNR. The shift count is -// just lanes[0], and *control is unchanged. -static bool TryRotateRight8x16(SimdConstant* control) { - const SimdConstant::I8x16& lanes = control->asInt8x16(); - // Look for the end of the first run of consecutive bytes. - int i = ScanIncreasingMasked(lanes, 0); - - // First run must start at a value s.t. we have a rotate if all remaining - // bytes are a run. - if (lanes[0] != 16 - i) { - return false; - } - - // If we reached the end of the vector, we're done. - if (i == 16) { - return true; - } - - // Second run must start at source lane zero. - if (lanes[i] != 0) { - return false; - } - - // Second run must end at the end of the lane vector. - return ScanIncreasingMasked(lanes, i) == 16; -} - -// We can permute by words if the mask is reducible to a word mask. -static bool TryPermute16x8(SimdConstant* control) { - SimdConstant tmp = *control; - if (!ByteMaskToWordMask(&tmp)) { - return false; - } - *control = tmp; - return true; -} - -// A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD. -static bool TryBroadcast16x8(SimdConstant* control) { - SimdConstant tmp = *control; - if (!ByteMaskToWordMask(&tmp)) { - return false; - } - const SimdConstant::I16x8& lanes = tmp.asInt16x8(); - if (ScanConstant(lanes, lanes[0], 0) < 8) { - return false; - } - *control = tmp; - return true; -} - -// A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W + -// PSHUFD. -static bool TryBroadcast8x16(SimdConstant* control) { - const SimdConstant::I8x16& lanes = control->asInt8x16(); - if (ScanConstant(lanes, lanes[0], 0) < 16) { - return false; - } - return true; -} - -// Look for permutations of a single operand. -static LWasmPermuteSimd128::Op AnalyzePermute(SimdConstant* control) { - // Lane indices are input-agnostic for single-operand permutations. - SimdConstant::I8x16 controlBytes; - MaskLanes(controlBytes, control->asInt8x16()); - - // Get rid of no-ops immediately, so nobody else needs to check. - if (IsIdentity(controlBytes)) { - return LWasmPermuteSimd128::MOVE; - } - - // Default control is the masked bytes. - *control = SimdConstant::CreateX16(controlBytes); - - // Analysis order matters here and is architecture-dependent or even - // microarchitecture-dependent: ideally the cheapest implementation first. - // The Intel manual says that the cost of a PSHUFB is about five other - // operations, so make that our cutoff. - // - // Word, dword, and qword reversals are handled optimally by general permutes. - // - // Byte reversals are probably best left to PSHUFB, no alternative rendition - // seems to reliably go below five instructions. (Discuss.) - // - // Word swaps within doublewords and dword swaps within quadwords are handled - // optimally by general permutes. - // - // Dword and qword broadcasts are handled by dword permute. - - if (TryPermute32x4(control)) { - return LWasmPermuteSimd128::PERMUTE_32x4; - } - if (TryRotateRight8x16(control)) { - return LWasmPermuteSimd128::ROTATE_RIGHT_8x16; - } - if (TryBroadcast16x8(control)) { - return LWasmPermuteSimd128::BROADCAST_16x8; - } - if (TryPermute16x8(control)) { - return LWasmPermuteSimd128::PERMUTE_16x8; - } - if (TryBroadcast8x16(control)) { - return LWasmPermuteSimd128::BROADCAST_8x16; - } - - // TODO: (From v8) Unzip and transpose generally have renditions that slightly - // beat a general permute (three or four instructions) - // - // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be - // used when merging two values. - // - // TODO: Byteswap is MOV + PSLLW + PSRLW + POR, a small win over PSHUFB. - - // The default operation is to permute bytes with the default control. - return LWasmPermuteSimd128::PERMUTE_8x16; -} - -// Can we shift the bytes left or right by a constant? A shift is a run of -// lanes from the rhs (which is zero) on one end and a run of values from the -// lhs on the other end. -static Maybe<LWasmPermuteSimd128::Op> TryShift8x16(SimdConstant* control) { - const SimdConstant::I8x16& lanes = control->asInt8x16(); - - // Represent all zero lanes by 16 - SimdConstant::I8x16 zeroesMasked; - MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; }); - - int i = ScanConstant(zeroesMasked, 16, 0); - int shiftLeft = i; - if (shiftLeft > 0 && lanes[shiftLeft] != 0) { - return Nothing(); - } - - i = ScanIncreasingUnmasked(zeroesMasked, i); - int shiftRight = 16 - i; - if (shiftRight > 0 && lanes[i - 1] != 15) { - return Nothing(); - } - - i = ScanConstant(zeroesMasked, 16, i); - if (i < 16 || (shiftRight > 0 && shiftLeft > 0) || - (shiftRight == 0 && shiftLeft == 0)) { - return Nothing(); - } - - if (shiftRight) { - *control = SimdConstant::SplatX16(shiftRight); - return Some(LWasmPermuteSimd128::SHIFT_RIGHT_8x16); - } - *control = SimdConstant::SplatX16(shiftLeft); - return Some(LWasmPermuteSimd128::SHIFT_LEFT_8x16); -} - -static Maybe<LWasmPermuteSimd128::Op> AnalyzeShuffleWithZero( - SimdConstant* control) { - Maybe<LWasmPermuteSimd128::Op> op; - op = TryShift8x16(control); - if (op) { - return op; - } - - // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST; - // PAND. This may beat the general byte blend code below. - return Nothing(); -} - -// Concat: if the result is the suffix (high bytes) of the rhs in front of a -// prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are -// swapped. -static Maybe<LWasmShuffleSimd128::Op> TryConcatRightShift8x16( - SimdConstant* control, bool* swapOperands) { - const SimdConstant::I8x16& lanes = control->asInt8x16(); - int i = ScanIncreasingUnmasked(lanes, 0); - MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere"); - // First run must end with 15 % 16 - if ((lanes[i - 1] & 15) != 15) { - return Nothing(); - } - // Second run must start with 0 % 16 - if ((lanes[i] & 15) != 0) { - return Nothing(); - } - // The two runs must come from different inputs - if ((lanes[i] & 16) == (lanes[i - 1] & 16)) { - return Nothing(); - } - int suffixLength = i; - - i = ScanIncreasingUnmasked(lanes, i); - // Must end at the left end - if (i != 16) { - return Nothing(); - } - - // If the suffix is from the lhs then swap the operands - if (lanes[0] < 16) { - *swapOperands = !*swapOperands; - } - *control = SimdConstant::SplatX16(suffixLength); - return Some(LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16); -} - -// Blend words: if we pick words from both operands without a pattern but all -// the input words stay in their position then this is PBLENDW (immediate mask); -// this also handles all larger sizes on x64. -static Maybe<LWasmShuffleSimd128::Op> TryBlendInt16x8(SimdConstant* control) { - SimdConstant tmp(*control); - if (!ByteMaskToWordMask(&tmp)) { - return Nothing(); - } - SimdConstant::I16x8 masked; - MaskLanes(masked, tmp.asInt16x8()); - if (!IsIdentity(masked)) { - return Nothing(); - } - SimdConstant::I16x8 mapped; - MapLanes(mapped, tmp.asInt16x8(), - [](int x) -> int { return x < 8 ? 0 : -1; }); - *control = SimdConstant::CreateX8(mapped); - return Some(LWasmShuffleSimd128::BLEND_16x8); -} - -// Blend bytes: if we pick bytes ditto then this is a byte blend, which can be -// handled with a CONST, PAND, PANDNOT, and POR. -// -// TODO: Optimization opportunity? If we pick all but one lanes from one with at -// most one from the other then it could be a MOV + PEXRB + PINSRB (also if this -// element is not in its source location). -static Maybe<LWasmShuffleSimd128::Op> TryBlendInt8x16(SimdConstant* control) { - SimdConstant::I8x16 masked; - MaskLanes(masked, control->asInt8x16()); - if (!IsIdentity(masked)) { - return Nothing(); - } - SimdConstant::I8x16 mapped; - MapLanes(mapped, control->asInt8x16(), - [](int x) -> int { return x < 16 ? 0 : -1; }); - *control = SimdConstant::CreateX16(mapped); - return Some(LWasmShuffleSimd128::BLEND_8x16); -} - -template <typename T> -static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) { - for (int i = 0; i < len; i++) { - if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) { - return false; - } - } - return true; -} - -// Unpack/interleave: -// - if we interleave the low (bytes/words/doublewords) of the inputs into -// the output then this is UNPCKL*W (possibly with a swap of operands). -// - if we interleave the high ditto then it is UNPCKH*W (ditto) -template <typename T> -static Maybe<LWasmShuffleSimd128::Op> TryInterleave( - const T* lanes, int lhs, int rhs, bool* swapOperands, - LWasmShuffleSimd128::Op lowOp, LWasmShuffleSimd128::Op highOp) { - int len = int(32 / (sizeof(T) * 4)); - if (MatchInterleave(lanes, lhs, rhs, len)) { - return Some(lowOp); - } - if (MatchInterleave(lanes, rhs, lhs, len)) { - *swapOperands = !*swapOperands; - return Some(lowOp); - } - if (MatchInterleave(lanes, lhs + len, rhs + len, len)) { - return Some(highOp); - } - if (MatchInterleave(lanes, rhs + len, lhs + len, len)) { - *swapOperands = !*swapOperands; - return Some(highOp); - } - return Nothing(); -} - -static Maybe<LWasmShuffleSimd128::Op> TryInterleave64x2(SimdConstant* control, - bool* swapOperands) { - SimdConstant tmp = *control; - if (!ByteMaskToQWordMask(&tmp)) { - return Nothing(); - } - const SimdConstant::I64x2& lanes = tmp.asInt64x2(); - return TryInterleave(lanes, 0, 2, swapOperands, - LWasmShuffleSimd128::INTERLEAVE_LOW_64x2, - LWasmShuffleSimd128::INTERLEAVE_HIGH_64x2); -} - -static Maybe<LWasmShuffleSimd128::Op> TryInterleave32x4(SimdConstant* control, - bool* swapOperands) { - SimdConstant tmp = *control; - if (!ByteMaskToDWordMask(&tmp)) { - return Nothing(); - } - const SimdConstant::I32x4& lanes = tmp.asInt32x4(); - return TryInterleave(lanes, 0, 4, swapOperands, - LWasmShuffleSimd128::INTERLEAVE_LOW_32x4, - LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4); -} - -static Maybe<LWasmShuffleSimd128::Op> TryInterleave16x8(SimdConstant* control, - bool* swapOperands) { - SimdConstant tmp = *control; - if (!ByteMaskToWordMask(&tmp)) { - return Nothing(); - } - const SimdConstant::I16x8& lanes = tmp.asInt16x8(); - return TryInterleave(lanes, 0, 8, swapOperands, - LWasmShuffleSimd128::INTERLEAVE_LOW_16x8, - LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8); -} - -static Maybe<LWasmShuffleSimd128::Op> TryInterleave8x16(SimdConstant* control, - bool* swapOperands) { - const SimdConstant::I8x16& lanes = control->asInt8x16(); - return TryInterleave(lanes, 0, 16, swapOperands, - LWasmShuffleSimd128::INTERLEAVE_LOW_8x16, - LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16); -} - -static LWasmShuffleSimd128::Op AnalyzeTwoArgShuffle(SimdConstant* control, - bool* swapOperands) { - Maybe<LWasmShuffleSimd128::Op> op; - op = TryConcatRightShift8x16(control, swapOperands); - if (!op) { - op = TryBlendInt16x8(control); - } - if (!op) { - op = TryBlendInt8x16(control); - } - if (!op) { - op = TryInterleave64x2(control, swapOperands); - } - if (!op) { - op = TryInterleave32x4(control, swapOperands); - } - if (!op) { - op = TryInterleave16x8(control, swapOperands); - } - if (!op) { - op = TryInterleave8x16(control, swapOperands); - } - if (!op) { - op = Some(LWasmShuffleSimd128::SHUFFLE_BLEND_8x16); - } - return *op; -} - -// Reorder the operands if that seems useful, notably, move a constant to the -// right hand side. Rewrites the control to account for any move. -static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs, - SimdConstant* control) { - if ((*lhs)->isWasmFloatConstant()) { - MDefinition* tmp = *lhs; - *lhs = *rhs; - *rhs = tmp; - - int8_t controlBytes[16]; - const SimdConstant::I8x16& lanes = control->asInt8x16(); - for (unsigned i = 0; i < 16; i++) { - controlBytes[i] = lanes[i] ^ 16; - } - *control = SimdConstant::CreateX16(controlBytes); - - return true; - } - return false; -} - -Shuffle LIRGeneratorShared::AnalyzeShuffle(MWasmShuffleSimd128* ins) { - // Control may be updated, but only once we commit to an operation or when we - // swap operands. - SimdConstant control = ins->control(); - MDefinition* lhs = ins->lhs(); - MDefinition* rhs = ins->rhs(); - - // If only one of the inputs is used, determine which. - bool useLeft = true; - bool useRight = true; - if (lhs == rhs) { - useRight = false; - } else { - bool allAbove = true; - bool allBelow = true; - const SimdConstant::I8x16& lanes = control.asInt8x16(); - for (unsigned i = 0; i < 16; i++) { - allAbove = allAbove && lanes[i] >= 16; - allBelow = allBelow && lanes[i] < 16; - } - if (allAbove) { - useLeft = false; - } else if (allBelow) { - useRight = false; - } - } - - // Deal with one-ignored-input. - if (!(useLeft && useRight)) { - LWasmPermuteSimd128::Op op = AnalyzePermute(&control); - return Shuffle::permute( - useLeft ? Shuffle::Operand::LEFT : Shuffle::Operand::RIGHT, control, - op); - } - - // Move constants to rhs. - bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control); - - // Deal with constant rhs. - if (rhs->isWasmFloatConstant()) { - SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128(); - if (rhsConstant.isZeroBits()) { - Maybe<LWasmPermuteSimd128::Op> op = AnalyzeShuffleWithZero(&control); - if (op) { - return Shuffle::permute( - swapOperands ? Shuffle::Operand::RIGHT : Shuffle::Operand::LEFT, - control, *op); - } - } - } - - // Two operands both of which are used. If there's one constant operand it is - // now on the rhs. - LWasmShuffleSimd128::Op op = AnalyzeTwoArgShuffle(&control, &swapOperands); - return Shuffle::shuffle( - swapOperands ? Shuffle::Operand::BOTH_SWAPPED : Shuffle::Operand::BOTH, - control, op); -} - -# ifdef DEBUG -void LIRGeneratorShared::ReportShuffleSpecialization(const Shuffle& s) { - switch (s.opd) { - case Shuffle::Operand::BOTH: - case Shuffle::Operand::BOTH_SWAPPED: - switch (*s.shuffleOp) { - case LWasmShuffleSimd128::SHUFFLE_BLEND_8x16: - js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16"); - break; - case LWasmShuffleSimd128::BLEND_8x16: - js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16"); - break; - case LWasmShuffleSimd128::BLEND_16x8: - js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8"); - break; - case LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16: - js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16"); - break; - case LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16: - js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16"); - break; - case LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8: - js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8"); - break; - case LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4: - js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4"); - break; - case LWasmShuffleSimd128::INTERLEAVE_HIGH_64x2: - js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 64x2"); - break; - case LWasmShuffleSimd128::INTERLEAVE_LOW_8x16: - js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16"); - break; - case LWasmShuffleSimd128::INTERLEAVE_LOW_16x8: - js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8"); - break; - case LWasmShuffleSimd128::INTERLEAVE_LOW_32x4: - js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4"); - break; - case LWasmShuffleSimd128::INTERLEAVE_LOW_64x2: - js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 64x2"); - break; - default: - MOZ_CRASH("Unexpected shuffle op"); - } - break; - case Shuffle::Operand::LEFT: - case Shuffle::Operand::RIGHT: - switch (*s.permuteOp) { - case LWasmPermuteSimd128::BROADCAST_8x16: - js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16"); - break; - case LWasmPermuteSimd128::BROADCAST_16x8: - js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8"); - break; - case LWasmPermuteSimd128::MOVE: - js::wasm::ReportSimdAnalysis("shuffle -> move"); - break; - case LWasmPermuteSimd128::PERMUTE_8x16: - js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16"); - break; - case LWasmPermuteSimd128::PERMUTE_16x8: - js::wasm::ReportSimdAnalysis("shuffle -> permute 16x8"); - break; - case LWasmPermuteSimd128::PERMUTE_32x4: - js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4"); - break; - case LWasmPermuteSimd128::ROTATE_RIGHT_8x16: - js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16"); - break; - case LWasmPermuteSimd128::SHIFT_LEFT_8x16: - js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16"); - break; - case LWasmPermuteSimd128::SHIFT_RIGHT_8x16: - js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16"); - break; - default: - MOZ_CRASH("Unexpected permute op"); - } - break; - } -} -# endif // DEBUG - -#endif // ENABLE_WASM_SIMD diff --git a/js/src/jit/shared/Lowering-shared.h b/js/src/jit/shared/Lowering-shared.h index 9de7fcb7f5ac4..83adb2efbfe3e 100644 --- a/js/src/jit/shared/Lowering-shared.h +++ b/js/src/jit/shared/Lowering-shared.h @@ -22,45 +22,6 @@ class MDefinition; class MInstruction; class LOsiPoint; -#ifdef ENABLE_WASM_SIMD - -// Representation of the result of the shuffle analysis. See -// Lowering-shared.cpp for more. - -struct Shuffle { - enum class Operand { - // Both inputs, in the original lhs-rhs order - BOTH, - // Both inputs, but in rhs-lhs order - BOTH_SWAPPED, - // Only the lhs input - LEFT, - // Only the rhs input - RIGHT, - }; - - Operand opd; - SimdConstant control; - mozilla::Maybe<LWasmPermuteSimd128::Op> permuteOp; // Single operands - mozilla::Maybe<LWasmShuffleSimd128::Op> shuffleOp; // Double operands - - static Shuffle permute(Operand opd, SimdConstant control, - LWasmPermuteSimd128::Op op) { - MOZ_ASSERT(opd == Operand::LEFT || opd == Operand::RIGHT); - Shuffle s{opd, control, mozilla::Some(op), mozilla::Nothing()}; - return s; - } - - static Shuffle shuffle(Operand opd, SimdConstant control, - LWasmShuffleSimd128::Op op) { - MOZ_ASSERT(opd == Operand::BOTH || opd == Operand::BOTH_SWAPPED); - Shuffle s{opd, control, mozilla::Nothing(), mozilla::Some(op)}; - return s; - } -}; - -#endif - class LIRGeneratorShared { protected: MIRGenerator* gen; @@ -111,13 +72,6 @@ class LIRGeneratorShared { static bool ShouldReorderCommutative(MDefinition* lhs, MDefinition* rhs, MInstruction* ins); -#ifdef ENABLE_WASM_SIMD - static Shuffle AnalyzeShuffle(MWasmShuffleSimd128* ins); -# ifdef DEBUG - static void ReportShuffleSpecialization(const Shuffle& s); -# endif -#endif - // A backend can decide that an instruction should be emitted at its uses, // rather than at its definition. To communicate this, set the // instruction's virtual register set to 0. When using the instruction, diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp index a6eab3026f547..da0df6c0cf372 100644 --- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp +++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp @@ -3043,65 +3043,65 @@ void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) { FloatRegister rhs = ToFloatRegister(ins->rhs()); SimdConstant control = ins->control(); switch (ins->op()) { - case LWasmShuffleSimd128::BLEND_8x16: { + case SimdShuffleOp::BLEND_8x16: { masm.blendInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()), lhsDest, rhs, lhsDest, ToFloatRegister(ins->temp())); break; } - case LWasmShuffleSimd128::BLEND_16x8: { + case SimdShuffleOp::BLEND_16x8: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.blendInt16x8(reinterpret_cast<const uint16_t*>(control.asInt16x8()), lhsDest, rhs, lhsDest); break; } - case LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16: { + case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: { MOZ_ASSERT(ins->temp()->isBogusTemp()); int8_t count = 16 - control.asInt8x16()[0]; MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); masm.concatAndRightShiftSimd128(rhs, lhsDest, count); break; } - case LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16: { + case SimdShuffleOp::INTERLEAVE_HIGH_8x16: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveHighInt8x16(rhs, lhsDest); break; } - case LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8: { + case SimdShuffleOp::INTERLEAVE_HIGH_16x8: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveHighInt16x8(rhs, lhsDest); break; } - case LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4: { + case SimdShuffleOp::INTERLEAVE_HIGH_32x4: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveHighInt32x4(rhs, lhsDest); break; } - case LWasmShuffleSimd128::INTERLEAVE_HIGH_64x2: { + case SimdShuffleOp::INTERLEAVE_HIGH_64x2: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveHighInt64x2(rhs, lhsDest); break; } - case LWasmShuffleSimd128::INTERLEAVE_LOW_8x16: { + case SimdShuffleOp::INTERLEAVE_LOW_8x16: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveLowInt8x16(rhs, lhsDest); break; } - case LWasmShuffleSimd128::INTERLEAVE_LOW_16x8: { + case SimdShuffleOp::INTERLEAVE_LOW_16x8: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveLowInt16x8(rhs, lhsDest); break; } - case LWasmShuffleSimd128::INTERLEAVE_LOW_32x4: { + case SimdShuffleOp::INTERLEAVE_LOW_32x4: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveLowInt32x4(rhs, lhsDest); break; } - case LWasmShuffleSimd128::INTERLEAVE_LOW_64x2: { + case SimdShuffleOp::INTERLEAVE_LOW_64x2: { MOZ_ASSERT(ins->temp()->isBogusTemp()); masm.interleaveLowInt64x2(rhs, lhsDest); break; } - case LWasmShuffleSimd128::SHUFFLE_BLEND_8x16: { + case SimdShuffleOp::SHUFFLE_BLEND_8x16: { masm.shuffleInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()), rhs, lhsDest); break; @@ -3204,7 +3204,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { SimdConstant control = ins->control(); switch (ins->op()) { // For broadcast, would MOVDDUP be better than PSHUFD for the last step? - case LWasmPermuteSimd128::BROADCAST_8x16: { + case SimdPermuteOp::BROADCAST_8x16: { const SimdConstant::I8x16& mask = control.asInt8x16(); int8_t source = mask[0]; if (src != dest) { @@ -3229,7 +3229,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { } break; } - case LWasmPermuteSimd128::BROADCAST_16x8: { + case SimdPermuteOp::BROADCAST_16x8: { const SimdConstant::I16x8& mask = control.asInt16x8(); int16_t source = mask[0]; uint16_t v = uint16_t(source & 3); @@ -3245,11 +3245,11 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { } break; } - case LWasmPermuteSimd128::MOVE: { + case SimdPermuteOp::MOVE: { masm.moveSimd128(src, dest); break; } - case LWasmPermuteSimd128::PERMUTE_8x16: { + case SimdPermuteOp::PERMUTE_8x16: { const SimdConstant::I8x16& mask = control.asInt8x16(); # ifdef DEBUG DebugOnly<int> i; @@ -3260,7 +3260,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { masm.permuteInt8x16(reinterpret_cast<const uint8_t*>(mask), src, dest); break; } - case LWasmPermuteSimd128::PERMUTE_16x8: { + case SimdPermuteOp::PERMUTE_16x8: { # ifdef DEBUG const SimdConstant::I16x8& mask = control.asInt16x8(); DebugOnly<int> i; @@ -3297,7 +3297,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { } break; } - case LWasmPermuteSimd128::PERMUTE_32x4: { + case SimdPermuteOp::PERMUTE_32x4: { const SimdConstant::I32x4& mask = control.asInt32x4(); # ifdef DEBUG DebugOnly<int> i; @@ -3308,7 +3308,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { masm.permuteInt32x4(reinterpret_cast<const uint32_t*>(mask), src, dest); break; } - case LWasmPermuteSimd128::ROTATE_RIGHT_8x16: { + case SimdPermuteOp::ROTATE_RIGHT_8x16: { if (src != dest) { masm.moveSimd128(src, dest); } @@ -3317,13 +3317,13 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { masm.concatAndRightShiftSimd128(dest, dest, count); break; } - case LWasmPermuteSimd128::SHIFT_LEFT_8x16: { + case SimdPermuteOp::SHIFT_LEFT_8x16: { int8_t count = control.asInt8x16()[0]; MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); masm.leftShiftSimd128(Imm32(count), src, dest); break; } - case LWasmPermuteSimd128::SHIFT_RIGHT_8x16: { + case SimdPermuteOp::SHIFT_RIGHT_8x16: { int8_t count = control.asInt8x16()[0]; MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); masm.rightShiftSimd128(Imm32(count), src, dest); diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp index 3db6105544735..aae437485b8e7 100644 --- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp +++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp @@ -1227,13 +1227,10 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) { MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128); MOZ_ASSERT(ins->type() == MIRType::Simd128); - Shuffle s = AnalyzeShuffle(ins); -# ifdef DEBUG - ReportShuffleSpecialization(s); -# endif + SimdShuffle s = ins->shuffle(); switch (s.opd) { - case Shuffle::Operand::LEFT: - case Shuffle::Operand::RIGHT: { + case SimdShuffle::Operand::LEFT: + case SimdShuffle::Operand::RIGHT: { LAllocation src; // All permute operators currently favor reusing the input register so // we're not currently exercising code paths below that do not reuse. @@ -1241,21 +1238,21 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) { // to be correct. bool useAtStartAndReuse = false; switch (*s.permuteOp) { - case LWasmPermuteSimd128::MOVE: - case LWasmPermuteSimd128::BROADCAST_8x16: - case LWasmPermuteSimd128::BROADCAST_16x8: - case LWasmPermuteSimd128::PERMUTE_8x16: - case LWasmPermuteSimd128::PERMUTE_16x8: - case LWasmPermuteSimd128::PERMUTE_32x4: - case LWasmPermuteSimd128::ROTATE_RIGHT_8x16: - case LWasmPermuteSimd128::SHIFT_LEFT_8x16: - case LWasmPermuteSimd128::SHIFT_RIGHT_8x16: + case SimdPermuteOp::MOVE: + case SimdPermuteOp::BROADCAST_8x16: + case SimdPermuteOp::BROADCAST_16x8: + case SimdPermuteOp::PERMUTE_8x16: + case SimdPermuteOp::PERMUTE_16x8: + case SimdPermuteOp::PERMUTE_32x4: + case SimdPermuteOp::ROTATE_RIGHT_8x16: + case SimdPermuteOp::SHIFT_LEFT_8x16: + case SimdPermuteOp::SHIFT_RIGHT_8x16: useAtStartAndReuse = true; break; default: MOZ_CRASH("Unexpected operator"); } - if (s.opd == Shuffle::Operand::LEFT) { + if (s.opd == SimdShuffle::Operand::LEFT) { if (useAtStartAndReuse) { src = useRegisterAtStart(ins->lhs()); } else { @@ -1277,11 +1274,11 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) { } break; } - case Shuffle::Operand::BOTH: - case Shuffle::Operand::BOTH_SWAPPED: { + case SimdShuffle::Operand::BOTH: + case SimdShuffle::Operand::BOTH_SWAPPED: { LDefinition temp = LDefinition::BogusTemp(); switch (*s.shuffleOp) { - case LWasmShuffleSimd128::BLEND_8x16: + case SimdShuffleOp::BLEND_8x16: temp = tempFixed(xmm0); break; default: @@ -1289,7 +1286,7 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) { } LAllocation lhs; LAllocation rhs; - if (s.opd == Shuffle::Operand::BOTH) { + if (s.opd == SimdShuffle::Operand::BOTH) { lhs = useRegisterAtStart(ins->lhs()); rhs = useRegister(ins->rhs()); } else { diff --git a/js/src/wasm/WasmIonCompile.cpp b/js/src/wasm/WasmIonCompile.cpp index 193ed1944a8f4..667a94672c707 100644 --- a/js/src/wasm/WasmIonCompile.cpp +++ b/js/src/wasm/WasmIonCompile.cpp @@ -27,6 +27,7 @@ #include "jit/CompileInfo.h" #include "jit/Ion.h" #include "jit/IonOptimizationLevels.h" +#include "jit/ShuffleAnalysis.h" #include "js/ScalarType.h" // js::Scalar::Type #include "wasm/WasmBaselineCompile.h" #include "wasm/WasmBuiltins.h" @@ -805,9 +806,10 @@ class FunctionCompiler { MOZ_ASSERT(v1->type() == MIRType::Simd128); MOZ_ASSERT(v2->type() == MIRType::Simd128); - auto* ins = MWasmShuffleSimd128::New( - alloc(), v1, v2, - SimdConstant::CreateX16(reinterpret_cast<int8_t*>(control.bytes))); + SimdShuffle s = AnalyzeSimdShuffle( + SimdConstant::CreateX16(reinterpret_cast<int8_t*>(control.bytes)), v1, + v2); + auto* ins = MWasmShuffleSimd128::New(alloc(), v1, v2, s); curBlock_->add(ins); return ins; } -- GitLab