From 341290c2535e2991577344798d8abc7c79ded04a Mon Sep 17 00:00:00 2001
From: Yury Delendik <ydelendik@mozilla.com>
Date: Thu, 9 Sep 2021 16:40:44 +0000
Subject: [PATCH] Bug 1672343 - Move shuffle analysis to the MIR. r=lth

Differential Revision: https://phabricator.services.mozilla.com/D124245
---
 js/src/jit/MIR.cpp                            |  13 +-
 js/src/jit/MIR.h                              |   1 +
 js/src/jit/MIROps.yaml                        |   2 +-
 js/src/jit/ShuffleAnalysis.cpp                | 717 ++++++++++++++++++
 js/src/jit/ShuffleAnalysis.h                  | 133 ++++
 js/src/jit/arm64/CodeGenerator-arm64.cpp      |  42 +-
 js/src/jit/arm64/Lowering-arm64.cpp           |  35 +-
 js/src/jit/moz.build                          |   1 +
 js/src/jit/shared/LIR-shared.h                |  90 +--
 js/src/jit/shared/Lowering-shared.cpp         | 698 -----------------
 js/src/jit/shared/Lowering-shared.h           |  46 --
 .../x86-shared/CodeGenerator-x86-shared.cpp   |  42 +-
 js/src/jit/x86-shared/Lowering-x86-shared.cpp |  37 +-
 js/src/wasm/WasmIonCompile.cpp                |   8 +-
 14 files changed, 949 insertions(+), 916 deletions(-)
 create mode 100644 js/src/jit/ShuffleAnalysis.cpp
 create mode 100644 js/src/jit/ShuffleAnalysis.h

diff --git a/js/src/jit/MIR.cpp b/js/src/jit/MIR.cpp
index 87ace27effce7..25060cd6c8bf4 100644
--- a/js/src/jit/MIR.cpp
+++ b/js/src/jit/MIR.cpp
@@ -4554,8 +4554,9 @@ MDefinition* MWasmTernarySimd128::foldsTo(TempAllocator& alloc) {
       v2()->op() == MDefinition::Opcode::WasmFloatConstant) {
     int8_t shuffle[16];
     if (specializeBitselectConstantMaskAsShuffle(shuffle)) {
-      return MWasmShuffleSimd128::New(alloc, v0(), v1(),
-                                      SimdConstant::CreateX16(shuffle));
+      SimdShuffle s =
+          AnalyzeSimdShuffle(SimdConstant::CreateX16(shuffle), v0(), v1());
+      return MWasmShuffleSimd128::New(alloc, v0(), v1(), s);
     }
   }
   return this;
@@ -4581,8 +4582,9 @@ MDefinition* MWasmBinarySimd128::foldsTo(TempAllocator& alloc) {
       return nullptr;
     }
     block()->insertBefore(this, zero);
-    return MWasmShuffleSimd128::New(alloc, lhs(), zero,
-                                    SimdConstant::CreateX16(shuffleMask));
+    SimdShuffle s =
+        AnalyzeSimdShuffle(SimdConstant::CreateX16(shuffleMask), lhs(), zero);
+    return MWasmShuffleSimd128::New(alloc, lhs(), zero, s);
   }
 
   // Specialize var OP const / const OP var when possible.
@@ -6081,7 +6083,8 @@ bool MWasmShiftSimd128::congruentTo(const MDefinition* ins) const {
 }
 
 bool MWasmShuffleSimd128::congruentTo(const MDefinition* ins) const {
-  return ins->toWasmShuffleSimd128()->control().bitwiseEqual(control_) &&
+  return ins->toWasmShuffleSimd128()->shuffle().control.bitwiseEqual(
+             shuffle_.control) &&
          congruentIfOperandsEqual(ins);
 }
 
diff --git a/js/src/jit/MIR.h b/js/src/jit/MIR.h
index 76599162ac7d3..2618fb076e557 100644
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -26,6 +26,7 @@
 #include "jit/JitAllocPolicy.h"
 #include "jit/MacroAssembler.h"
 #include "jit/MIROpsGenerated.h"
+#include "jit/ShuffleAnalysis.h"
 #include "jit/TypeData.h"
 #include "jit/TypePolicy.h"
 #include "js/experimental/JitInfo.h"  // JSJit{Getter,Setter}Op, JSJitInfo
diff --git a/js/src/jit/MIROps.yaml b/js/src/jit/MIROps.yaml
index 26093f9c74622..ea5f3b09b9020 100644
--- a/js/src/jit/MIROps.yaml
+++ b/js/src/jit/MIROps.yaml
@@ -2588,7 +2588,7 @@
     lhs: Simd128
     rhs: Simd128
   arguments:
-    control: SimdConstant
+    shuffle: SimdShuffle
   type_policy: none
   result_type: Simd128
   movable: true
diff --git a/js/src/jit/ShuffleAnalysis.cpp b/js/src/jit/ShuffleAnalysis.cpp
new file mode 100644
index 0000000000000..f63d89409d4ee
--- /dev/null
+++ b/js/src/jit/ShuffleAnalysis.cpp
@@ -0,0 +1,717 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/ShuffleAnalysis.h"
+#include "jit/MIR.h"
+
+using namespace js;
+using namespace jit;
+
+using mozilla::Maybe;
+using mozilla::Nothing;
+using mozilla::Some;
+
+#ifdef ENABLE_WASM_SIMD
+
+// Specialization analysis for SIMD operations.  This is still x86-centric but
+// generalizes fairly easily to other architectures.
+
+// Optimization of v8x16.shuffle.  The general byte shuffle+blend is very
+// expensive (equivalent to at least a dozen instructions), and we want to avoid
+// that if we can.  So look for special cases - there are many.
+//
+// The strategy is to sort the operation into one of three buckets depending
+// on the shuffle pattern and inputs:
+//
+//  - single operand; shuffles on these values are rotations, reversals,
+//    transpositions, and general permutations
+//  - single-operand-with-interesting-constant (especially zero); shuffles on
+//    these values are often byte shift or scatter operations
+//  - dual operand; shuffles on these operations are blends, catenated
+//    shifts, and (in the worst case) general shuffle+blends
+//
+// We're not trying to solve the general problem, only to lower reasonably
+// expressed patterns that express common operations.  Producers that produce
+// dense and convoluted patterns will end up with the general byte shuffle.
+// Producers that produce simpler patterns that easily map to hardware will
+// get faster code.
+//
+// In particular, these matchers do not try to combine transformations, so a
+// shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is
+// usually going to end up as a general byte shuffle.
+
+// Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return
+// true, updating *control.
+static bool ByteMaskToWordMask(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  int16_t controlWords[8];
+  for (int i = 0; i < 16; i += 2) {
+    if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) {
+      return false;
+    }
+    controlWords[i / 2] = int16_t(lanes[i] / 2);
+  }
+  *control = SimdConstant::CreateX8(controlWords);
+  return true;
+}
+
+// Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return
+// true, updating *control.
+static bool ByteMaskToDWordMask(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  int32_t controlDWords[4];
+  for (int i = 0; i < 16; i += 4) {
+    if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 &&
+          lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) {
+      return false;
+    }
+    controlDWords[i / 4] = lanes[i] / 4;
+  }
+  *control = SimdConstant::CreateX4(controlDWords);
+  return true;
+}
+
+// Reduce a 0..31 byte mask to a 0..3 qword mask if possible and if so return
+// true, updating *control.
+static bool ByteMaskToQWordMask(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  int64_t controlQWords[2];
+  for (int i = 0; i < 16; i += 8) {
+    if (!((lanes[i] & 7) == 0 && lanes[i + 1] == lanes[i] + 1 &&
+          lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3 &&
+          lanes[i + 4] == lanes[i] + 4 && lanes[i + 5] == lanes[i] + 5 &&
+          lanes[i + 6] == lanes[i] + 6 && lanes[i + 7] == lanes[i] + 7)) {
+      return false;
+    }
+    controlQWords[i / 8] = lanes[i] / 8;
+  }
+  *control = SimdConstant::CreateX2(controlQWords);
+  return true;
+}
+
+// Skip across consecutive values in lanes starting at i, returning the index
+// after the last element.  Lane values must be <= len-1 ("masked").
+//
+// Since every element is a 1-element run, the return value is never the same as
+// the starting i.
+template <typename T>
+static int ScanIncreasingMasked(const T* lanes, int i) {
+  int len = int(16 / sizeof(T));
+  MOZ_ASSERT(i < len);
+  MOZ_ASSERT(lanes[i] <= len - 1);
+  i++;
+  while (i < len && lanes[i] == lanes[i - 1] + 1) {
+    MOZ_ASSERT(lanes[i] <= len - 1);
+    i++;
+  }
+  return i;
+}
+
+// Skip across consecutive values in lanes starting at i, returning the index
+// after the last element.  Lane values must be <= len*2-1 ("unmasked"); the
+// values len-1 and len are not considered consecutive.
+//
+// Since every element is a 1-element run, the return value is never the same as
+// the starting i.
+template <typename T>
+static int ScanIncreasingUnmasked(const T* lanes, int i) {
+  int len = int(16 / sizeof(T));
+  MOZ_ASSERT(i < len);
+  if (lanes[i] < len) {
+    i++;
+    while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) {
+      i++;
+    }
+  } else {
+    i++;
+    while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) {
+      i++;
+    }
+  }
+  return i;
+}
+
+// Skip lanes that equal v starting at i, returning the index just beyond the
+// last of those.  There is no requirement that the initial lanes[i] == v.
+template <typename T>
+static int ScanConstant(const T* lanes, int v, int i) {
+  int len = int(16 / sizeof(T));
+  MOZ_ASSERT(i <= len);
+  while (i < len && lanes[i] == v) {
+    i++;
+  }
+  return i;
+}
+
+// Mask lane values denoting rhs elements into lhs elements.
+template <typename T>
+static void MaskLanes(T* result, const T* input) {
+  int len = int(16 / sizeof(T));
+  for (int i = 0; i < len; i++) {
+    result[i] = input[i] & (len - 1);
+  }
+}
+
+// Apply a transformation to each lane value.
+template <typename T>
+static void MapLanes(T* result, const T* input, int (*f)(int)) {
+  // Hazard analysis trips on "IndirectCall: f" error.
+  // Suppress the check -- `f` is expected to be trivial here.
+  JS::AutoSuppressGCAnalysis nogc;
+
+  int len = int(16 / sizeof(T));
+  for (int i = 0; i < len; i++) {
+    result[i] = f(input[i]);
+  }
+}
+
+// Recognize an identity permutation, assuming lanes is masked.
+template <typename T>
+static bool IsIdentity(const T* lanes) {
+  return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T));
+}
+
+// Recognize part of an identity permutation starting at start, with
+// the first value of the permutation expected to be bias.
+template <typename T>
+static bool IsIdentity(const T* lanes, int start, int len, int bias) {
+  if (lanes[start] != bias) {
+    return false;
+  }
+  for (int i = start + 1; i < start + len; i++) {
+    if (lanes[i] != lanes[i - 1] + 1) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// We can permute by dwords if the mask is reducible to a dword mask, and in
+// this case a single PSHUFD is enough.
+static bool TryPermute32x4(SimdConstant* control) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToDWordMask(&tmp)) {
+    return false;
+  }
+  *control = tmp;
+  return true;
+}
+
+// Can we perform a byte rotate right?  We can use PALIGNR.  The shift count is
+// just lanes[0], and *control is unchanged.
+static bool TryRotateRight8x16(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  // Look for the end of the first run of consecutive bytes.
+  int i = ScanIncreasingMasked(lanes, 0);
+
+  // First run must start at a value s.t. we have a rotate if all remaining
+  // bytes are a run.
+  if (lanes[0] != 16 - i) {
+    return false;
+  }
+
+  // If we reached the end of the vector, we're done.
+  if (i == 16) {
+    return true;
+  }
+
+  // Second run must start at source lane zero.
+  if (lanes[i] != 0) {
+    return false;
+  }
+
+  // Second run must end at the end of the lane vector.
+  return ScanIncreasingMasked(lanes, i) == 16;
+}
+
+// We can permute by words if the mask is reducible to a word mask.
+static bool TryPermute16x8(SimdConstant* control) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToWordMask(&tmp)) {
+    return false;
+  }
+  *control = tmp;
+  return true;
+}
+
+// A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD.
+static bool TryBroadcast16x8(SimdConstant* control) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToWordMask(&tmp)) {
+    return false;
+  }
+  const SimdConstant::I16x8& lanes = tmp.asInt16x8();
+  if (ScanConstant(lanes, lanes[0], 0) < 8) {
+    return false;
+  }
+  *control = tmp;
+  return true;
+}
+
+// A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W +
+// PSHUFD.
+static bool TryBroadcast8x16(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  return ScanConstant(lanes, lanes[0], 0) >= 16;
+}
+
+// Look for permutations of a single operand.
+static SimdPermuteOp AnalyzePermute(SimdConstant* control) {
+  // Lane indices are input-agnostic for single-operand permutations.
+  SimdConstant::I8x16 controlBytes;
+  MaskLanes(controlBytes, control->asInt8x16());
+
+  // Get rid of no-ops immediately, so nobody else needs to check.
+  if (IsIdentity(controlBytes)) {
+    return SimdPermuteOp::MOVE;
+  }
+
+  // Default control is the masked bytes.
+  *control = SimdConstant::CreateX16(controlBytes);
+
+  // Analysis order matters here and is architecture-dependent or even
+  // microarchitecture-dependent: ideally the cheapest implementation first.
+  // The Intel manual says that the cost of a PSHUFB is about five other
+  // operations, so make that our cutoff.
+  //
+  // Word, dword, and qword reversals are handled optimally by general permutes.
+  //
+  // Byte reversals are probably best left to PSHUFB, no alternative rendition
+  // seems to reliably go below five instructions.  (Discuss.)
+  //
+  // Word swaps within doublewords and dword swaps within quadwords are handled
+  // optimally by general permutes.
+  //
+  // Dword and qword broadcasts are handled by dword permute.
+
+  if (TryPermute32x4(control)) {
+    return SimdPermuteOp::PERMUTE_32x4;
+  }
+  if (TryRotateRight8x16(control)) {
+    return SimdPermuteOp::ROTATE_RIGHT_8x16;
+  }
+  if (TryBroadcast16x8(control)) {
+    return SimdPermuteOp::BROADCAST_16x8;
+  }
+  if (TryPermute16x8(control)) {
+    return SimdPermuteOp::PERMUTE_16x8;
+  }
+  if (TryBroadcast8x16(control)) {
+    return SimdPermuteOp::BROADCAST_8x16;
+  }
+
+  // TODO: (From v8) Unzip and transpose generally have renditions that slightly
+  // beat a general permute (three or four instructions)
+  //
+  // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be
+  // used when merging two values.
+  //
+  // TODO: Byteswap is MOV + PSLLW + PSRLW + POR, a small win over PSHUFB.
+
+  // The default operation is to permute bytes with the default control.
+  return SimdPermuteOp::PERMUTE_8x16;
+}
+
+// Can we shift the bytes left or right by a constant?  A shift is a run of
+// lanes from the rhs (which is zero) on one end and a run of values from the
+// lhs on the other end.
+static Maybe<SimdPermuteOp> TryShift8x16(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+
+  // Represent all zero lanes by 16
+  SimdConstant::I8x16 zeroesMasked;
+  MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; });
+
+  int i = ScanConstant(zeroesMasked, 16, 0);
+  int shiftLeft = i;
+  if (shiftLeft > 0 && lanes[shiftLeft] != 0) {
+    return Nothing();
+  }
+
+  i = ScanIncreasingUnmasked(zeroesMasked, i);
+  int shiftRight = 16 - i;
+  if (shiftRight > 0 && lanes[i - 1] != 15) {
+    return Nothing();
+  }
+
+  i = ScanConstant(zeroesMasked, 16, i);
+  if (i < 16 || (shiftRight > 0 && shiftLeft > 0) ||
+      (shiftRight == 0 && shiftLeft == 0)) {
+    return Nothing();
+  }
+
+  if (shiftRight) {
+    *control = SimdConstant::SplatX16((int8_t)shiftRight);
+    return Some(SimdPermuteOp::SHIFT_RIGHT_8x16);
+  }
+  *control = SimdConstant::SplatX16((int8_t)shiftLeft);
+  return Some(SimdPermuteOp::SHIFT_LEFT_8x16);
+}
+
+static Maybe<SimdPermuteOp> AnalyzeShuffleWithZero(SimdConstant* control) {
+  Maybe<SimdPermuteOp> op;
+  op = TryShift8x16(control);
+  if (op) {
+    return op;
+  }
+
+  // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST;
+  // PAND.  This may beat the general byte blend code below.
+  return Nothing();
+}
+
+// Concat: if the result is the suffix (high bytes) of the rhs in front of a
+// prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are
+// swapped.
+static Maybe<SimdShuffleOp> TryConcatRightShift8x16(SimdConstant* control,
+                                                    bool* swapOperands) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  int i = ScanIncreasingUnmasked(lanes, 0);
+  MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere");
+  // First run must end with 15 % 16
+  if ((lanes[i - 1] & 15) != 15) {
+    return Nothing();
+  }
+  // Second run must start with 0 % 16
+  if ((lanes[i] & 15) != 0) {
+    return Nothing();
+  }
+  // The two runs must come from different inputs
+  if ((lanes[i] & 16) == (lanes[i - 1] & 16)) {
+    return Nothing();
+  }
+  int suffixLength = i;
+
+  i = ScanIncreasingUnmasked(lanes, i);
+  // Must end at the left end
+  if (i != 16) {
+    return Nothing();
+  }
+
+  // If the suffix is from the lhs then swap the operands
+  if (lanes[0] < 16) {
+    *swapOperands = !*swapOperands;
+  }
+  *control = SimdConstant::SplatX16((int8_t)suffixLength);
+  return Some(SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16);
+}
+
+// Blend words: if we pick words from both operands without a pattern but all
+// the input words stay in their position then this is PBLENDW (immediate mask);
+// this also handles all larger sizes on x64.
+static Maybe<SimdShuffleOp> TryBlendInt16x8(SimdConstant* control) {
+  SimdConstant tmp(*control);
+  if (!ByteMaskToWordMask(&tmp)) {
+    return Nothing();
+  }
+  SimdConstant::I16x8 masked;
+  MaskLanes(masked, tmp.asInt16x8());
+  if (!IsIdentity(masked)) {
+    return Nothing();
+  }
+  SimdConstant::I16x8 mapped;
+  MapLanes(mapped, tmp.asInt16x8(),
+           [](int x) -> int { return x < 8 ? 0 : -1; });
+  *control = SimdConstant::CreateX8(mapped);
+  return Some(SimdShuffleOp::BLEND_16x8);
+}
+
+// Blend bytes: if we pick bytes ditto then this is a byte blend, which can be
+// handled with a CONST, PAND, PANDNOT, and POR.
+//
+// TODO: Optimization opportunity? If we pick all but one lanes from one with at
+// most one from the other then it could be a MOV + PEXRB + PINSRB (also if this
+// element is not in its source location).
+static Maybe<SimdShuffleOp> TryBlendInt8x16(SimdConstant* control) {
+  SimdConstant::I8x16 masked;
+  MaskLanes(masked, control->asInt8x16());
+  if (!IsIdentity(masked)) {
+    return Nothing();
+  }
+  SimdConstant::I8x16 mapped;
+  MapLanes(mapped, control->asInt8x16(),
+           [](int x) -> int { return x < 16 ? 0 : -1; });
+  *control = SimdConstant::CreateX16(mapped);
+  return Some(SimdShuffleOp::BLEND_8x16);
+}
+
+template <typename T>
+static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) {
+  for (int i = 0; i < len; i++) {
+    if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Unpack/interleave:
+//  - if we interleave the low (bytes/words/doublewords) of the inputs into
+//    the output then this is UNPCKL*W (possibly with a swap of operands).
+//  - if we interleave the high ditto then it is UNPCKH*W (ditto)
+template <typename T>
+static Maybe<SimdShuffleOp> TryInterleave(const T* lanes, int lhs, int rhs,
+                                          bool* swapOperands,
+                                          SimdShuffleOp lowOp,
+                                          SimdShuffleOp highOp) {
+  int len = int(32 / (sizeof(T) * 4));
+  if (MatchInterleave(lanes, lhs, rhs, len)) {
+    return Some(lowOp);
+  }
+  if (MatchInterleave(lanes, rhs, lhs, len)) {
+    *swapOperands = !*swapOperands;
+    return Some(lowOp);
+  }
+  if (MatchInterleave(lanes, lhs + len, rhs + len, len)) {
+    return Some(highOp);
+  }
+  if (MatchInterleave(lanes, rhs + len, lhs + len, len)) {
+    *swapOperands = !*swapOperands;
+    return Some(highOp);
+  }
+  return Nothing();
+}
+
+static Maybe<SimdShuffleOp> TryInterleave64x2(SimdConstant* control,
+                                              bool* swapOperands) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToQWordMask(&tmp)) {
+    return Nothing();
+  }
+  const SimdConstant::I64x2& lanes = tmp.asInt64x2();
+  return TryInterleave(lanes, 0, 2, swapOperands,
+                       SimdShuffleOp::INTERLEAVE_LOW_64x2,
+                       SimdShuffleOp::INTERLEAVE_HIGH_64x2);
+}
+
+static Maybe<SimdShuffleOp> TryInterleave32x4(SimdConstant* control,
+                                              bool* swapOperands) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToDWordMask(&tmp)) {
+    return Nothing();
+  }
+  const SimdConstant::I32x4& lanes = tmp.asInt32x4();
+  return TryInterleave(lanes, 0, 4, swapOperands,
+                       SimdShuffleOp::INTERLEAVE_LOW_32x4,
+                       SimdShuffleOp::INTERLEAVE_HIGH_32x4);
+}
+
+static Maybe<SimdShuffleOp> TryInterleave16x8(SimdConstant* control,
+                                              bool* swapOperands) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToWordMask(&tmp)) {
+    return Nothing();
+  }
+  const SimdConstant::I16x8& lanes = tmp.asInt16x8();
+  return TryInterleave(lanes, 0, 8, swapOperands,
+                       SimdShuffleOp::INTERLEAVE_LOW_16x8,
+                       SimdShuffleOp::INTERLEAVE_HIGH_16x8);
+}
+
+static Maybe<SimdShuffleOp> TryInterleave8x16(SimdConstant* control,
+                                              bool* swapOperands) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  return TryInterleave(lanes, 0, 16, swapOperands,
+                       SimdShuffleOp::INTERLEAVE_LOW_8x16,
+                       SimdShuffleOp::INTERLEAVE_HIGH_8x16);
+}
+
+static SimdShuffleOp AnalyzeTwoArgShuffle(SimdConstant* control,
+                                          bool* swapOperands) {
+  Maybe<SimdShuffleOp> op;
+  op = TryConcatRightShift8x16(control, swapOperands);
+  if (!op) {
+    op = TryBlendInt16x8(control);
+  }
+  if (!op) {
+    op = TryBlendInt8x16(control);
+  }
+  if (!op) {
+    op = TryInterleave64x2(control, swapOperands);
+  }
+  if (!op) {
+    op = TryInterleave32x4(control, swapOperands);
+  }
+  if (!op) {
+    op = TryInterleave16x8(control, swapOperands);
+  }
+  if (!op) {
+    op = TryInterleave8x16(control, swapOperands);
+  }
+  if (!op) {
+    op = Some(SimdShuffleOp::SHUFFLE_BLEND_8x16);
+  }
+  return *op;
+}
+
+// Reorder the operands if that seems useful, notably, move a constant to the
+// right hand side.  Rewrites the control to account for any move.
+static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs,
+                                        SimdConstant* control) {
+  if ((*lhs)->isWasmFloatConstant()) {
+    MDefinition* tmp = *lhs;
+    *lhs = *rhs;
+    *rhs = tmp;
+
+    int8_t controlBytes[16];
+    const SimdConstant::I8x16& lanes = control->asInt8x16();
+    for (unsigned i = 0; i < 16; i++) {
+      controlBytes[i] = int8_t(lanes[i] ^ 16);
+    }
+    *control = SimdConstant::CreateX16(controlBytes);
+
+    return true;
+  }
+  return false;
+}
+
+#  ifdef DEBUG
+static const SimdShuffle& ReportShuffleSpecialization(const SimdShuffle& s) {
+  switch (s.opd) {
+    case SimdShuffle::Operand::BOTH:
+    case SimdShuffle::Operand::BOTH_SWAPPED:
+      switch (*s.shuffleOp) {
+        case SimdShuffleOp::SHUFFLE_BLEND_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16");
+          break;
+        case SimdShuffleOp::BLEND_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16");
+          break;
+        case SimdShuffleOp::BLEND_16x8:
+          js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8");
+          break;
+        case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16");
+          break;
+        case SimdShuffleOp::INTERLEAVE_HIGH_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16");
+          break;
+        case SimdShuffleOp::INTERLEAVE_HIGH_16x8:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8");
+          break;
+        case SimdShuffleOp::INTERLEAVE_HIGH_32x4:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4");
+          break;
+        case SimdShuffleOp::INTERLEAVE_HIGH_64x2:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 64x2");
+          break;
+        case SimdShuffleOp::INTERLEAVE_LOW_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16");
+          break;
+        case SimdShuffleOp::INTERLEAVE_LOW_16x8:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8");
+          break;
+        case SimdShuffleOp::INTERLEAVE_LOW_32x4:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4");
+          break;
+        case SimdShuffleOp::INTERLEAVE_LOW_64x2:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 64x2");
+          break;
+        default:
+          MOZ_CRASH("Unexpected shuffle op");
+      }
+      break;
+    case SimdShuffle::Operand::LEFT:
+    case SimdShuffle::Operand::RIGHT:
+      switch (*s.permuteOp) {
+        case SimdPermuteOp::BROADCAST_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16");
+          break;
+        case SimdPermuteOp::BROADCAST_16x8:
+          js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8");
+          break;
+        case SimdPermuteOp::MOVE:
+          js::wasm::ReportSimdAnalysis("shuffle -> move");
+          break;
+        case SimdPermuteOp::PERMUTE_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16");
+          break;
+        case SimdPermuteOp::PERMUTE_16x8:
+          js::wasm::ReportSimdAnalysis("shuffle -> permute 16x8");
+          break;
+        case SimdPermuteOp::PERMUTE_32x4:
+          js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4");
+          break;
+        case SimdPermuteOp::ROTATE_RIGHT_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16");
+          break;
+        case SimdPermuteOp::SHIFT_LEFT_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16");
+          break;
+        case SimdPermuteOp::SHIFT_RIGHT_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16");
+          break;
+        default:
+          MOZ_CRASH("Unexpected permute op");
+      }
+      break;
+  }
+  return s;
+}
+#  endif  // DEBUG
+
+SimdShuffle jit::AnalyzeSimdShuffle(SimdConstant control, MDefinition* lhs,
+                                    MDefinition* rhs) {
+#  ifdef DEBUG
+#    define R(s) ReportShuffleSpecialization(s)
+#  else
+#    define R(s) (s)
+#  endif
+
+  // If only one of the inputs is used, determine which.
+  bool useLeft = true;
+  bool useRight = true;
+  if (lhs == rhs) {
+    useRight = false;
+  } else {
+    bool allAbove = true;
+    bool allBelow = true;
+    const SimdConstant::I8x16& lanes = control.asInt8x16();
+    for (int8_t i : lanes) {
+      allAbove = allAbove && i >= 16;
+      allBelow = allBelow && i < 16;
+    }
+    if (allAbove) {
+      useLeft = false;
+    } else if (allBelow) {
+      useRight = false;
+    }
+  }
+
+  // Deal with one-ignored-input.
+  if (!(useLeft && useRight)) {
+    SimdPermuteOp op = AnalyzePermute(&control);
+    return R(SimdShuffle::permute(
+        useLeft ? SimdShuffle::Operand::LEFT : SimdShuffle::Operand::RIGHT,
+        control, op));
+  }
+
+  // Move constants to rhs.
+  bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control);
+
+  // Deal with constant rhs.
+  if (rhs->isWasmFloatConstant()) {
+    SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128();
+    if (rhsConstant.isZeroBits()) {
+      Maybe<SimdPermuteOp> op = AnalyzeShuffleWithZero(&control);
+      if (op) {
+        return R(SimdShuffle::permute(swapOperands ? SimdShuffle::Operand::RIGHT
+                                                   : SimdShuffle::Operand::LEFT,
+                                      control, *op));
+      }
+    }
+  }
+
+  // Two operands both of which are used.  If there's one constant operand it is
+  // now on the rhs.
+  SimdShuffleOp op = AnalyzeTwoArgShuffle(&control, &swapOperands);
+  return R(SimdShuffle::shuffle(swapOperands
+                                    ? SimdShuffle::Operand::BOTH_SWAPPED
+                                    : SimdShuffle::Operand::BOTH,
+                                control, op));
+#  undef R
+}
+
+#endif  // ENABLE_WASM_SIMD
diff --git a/js/src/jit/ShuffleAnalysis.h b/js/src/jit/ShuffleAnalysis.h
new file mode 100644
index 0000000000000..c2863b5f8a5ee
--- /dev/null
+++ b/js/src/jit/ShuffleAnalysis.h
@@ -0,0 +1,133 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_ShuffleAnalysis_h
+#define jit_ShuffleAnalysis_h
+
+#include "jit/IonTypes.h"
+
+namespace js {
+namespace jit {
+
+class MDefinition;
+
+// Permutation operations.  NOTE: these may still be x86-centric, but the set
+// can accomodate operations from other architectures.
+//
+// The "low-order" byte is in lane 0 of an 8x16 datum, the "high-order" byte
+// in lane 15.  The low-order byte is also the "rightmost".  In wasm, the
+// constant (v128.const i8x16 0 1 2 ... 15) has 0 in the low-order byte and 15
+// in the high-order byte.
+enum class SimdPermuteOp {
+  // A single byte lane is copied into all the other byte lanes.  control_[0]
+  // has the source lane.
+  BROADCAST_8x16,
+
+  // A single word lane is copied into all the other word lanes.  control_[0]
+  // has the source lane.
+  BROADCAST_16x8,
+
+  // Copy input to output.
+  MOVE,
+
+  // control_ has bytes in range 0..15 s.t. control_[i] holds the source lane
+  // for output lane i.
+  PERMUTE_8x16,
+
+  // control_ has int16s in range 0..7, as for 8x16.  In addition, the high
+  // byte of control_[0] has flags detailing the operation, values taken
+  // from the Perm16x8Action enum below.
+  PERMUTE_16x8,
+
+  // control_ has int32s in range 0..3, as for 8x16.
+  PERMUTE_32x4,
+
+  // control_[0] has the number of places to rotate by.
+  ROTATE_RIGHT_8x16,
+
+  // Zeroes are shifted into high-order bytes and low-order bytes are lost.
+  // control_[0] has the number of places to shift by.
+  SHIFT_RIGHT_8x16,
+
+  // Zeroes are shifted into low-order bytes and high-order bytes are lost.
+  // control_[0] has the number of places to shift by.
+  SHIFT_LEFT_8x16,
+};
+
+// Shuffle operations.  NOTE: these may still be x86-centric, but the set can
+// accomodate operations from other architectures.
+enum class SimdShuffleOp {
+  // Blend bytes.  control_ has the blend mask as an I8x16: 0 to select from
+  // the lhs, -1 to select from the rhs.
+  BLEND_8x16,
+
+  // Blend words.  control_ has the blend mask as an I16x8: 0 to select from
+  // the lhs, -1 to select from the rhs.
+  BLEND_16x8,
+
+  // Concat the lhs in front of the rhs and shift right by bytes, extracting
+  // the low 16 bytes; control_[0] has the shift count.
+  CONCAT_RIGHT_SHIFT_8x16,
+
+  // Interleave qwords/dwords/words/bytes from high/low halves of operands.
+  // The low-order item in the result comes from the lhs, then the next from
+  // the rhs, and so on.  control_ is ignored.
+  INTERLEAVE_HIGH_8x16,
+  INTERLEAVE_HIGH_16x8,
+  INTERLEAVE_HIGH_32x4,
+  INTERLEAVE_HIGH_64x2,
+  INTERLEAVE_LOW_8x16,
+  INTERLEAVE_LOW_16x8,
+  INTERLEAVE_LOW_32x4,
+  INTERLEAVE_LOW_64x2,
+
+  // Fully general shuffle+blend.  control_ has the shuffle mask.
+  SHUFFLE_BLEND_8x16,
+};
+
+// Representation of the result of the shuffle analysis.
+struct SimdShuffle {
+  enum class Operand {
+    // Both inputs, in the original lhs-rhs order
+    BOTH,
+    // Both inputs, but in rhs-lhs order
+    BOTH_SWAPPED,
+    // Only the lhs input
+    LEFT,
+    // Only the rhs input
+    RIGHT,
+  };
+
+  Operand opd;
+  SimdConstant control;
+  mozilla::Maybe<SimdPermuteOp> permuteOp;  // Single operands
+  mozilla::Maybe<SimdShuffleOp> shuffleOp;  // Double operands
+
+  static SimdShuffle permute(Operand opd, SimdConstant control,
+                             SimdPermuteOp op) {
+    MOZ_ASSERT(opd == Operand::LEFT || opd == Operand::RIGHT);
+    SimdShuffle s{opd, control, mozilla::Some(op), mozilla::Nothing()};
+    return s;
+  }
+
+  static SimdShuffle shuffle(Operand opd, SimdConstant control,
+                             SimdShuffleOp op) {
+    MOZ_ASSERT(opd == Operand::BOTH || opd == Operand::BOTH_SWAPPED);
+    SimdShuffle s{opd, control, mozilla::Nothing(), mozilla::Some(op)};
+    return s;
+  }
+};
+
+#ifdef ENABLE_WASM_SIMD
+
+SimdShuffle AnalyzeSimdShuffle(SimdConstant control, MDefinition* lhs,
+                               MDefinition* rhs);
+
+#endif
+
+}  // namespace jit
+}  // namespace js
+
+#endif  // jit_ShuffleAnalysis_h
diff --git a/js/src/jit/arm64/CodeGenerator-arm64.cpp b/js/src/jit/arm64/CodeGenerator-arm64.cpp
index 46d3a8ef39929..6cda7a0a6ea81 100644
--- a/js/src/jit/arm64/CodeGenerator-arm64.cpp
+++ b/js/src/jit/arm64/CodeGenerator-arm64.cpp
@@ -3507,55 +3507,55 @@ void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) {
   MOZ_ASSERT(ins->temp()->isBogusTemp());
   SimdConstant control = ins->control();
   switch (ins->op()) {
-    case LWasmShuffleSimd128::BLEND_8x16: {
+    case SimdShuffleOp::BLEND_8x16: {
       masm.blendInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()),
                         lhs, rhs, dest);
       break;
     }
-    case LWasmShuffleSimd128::BLEND_16x8: {
+    case SimdShuffleOp::BLEND_16x8: {
       masm.blendInt16x8(reinterpret_cast<const uint16_t*>(control.asInt16x8()),
                         lhs, rhs, dest);
       break;
     }
-    case LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16: {
+    case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: {
       int8_t count = 16 - control.asInt8x16()[0];
       MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
       masm.concatAndRightShiftSimd128(lhs, rhs, dest, count);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16: {
+    case SimdShuffleOp::INTERLEAVE_HIGH_8x16: {
       masm.interleaveHighInt8x16(lhs, rhs, dest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8: {
+    case SimdShuffleOp::INTERLEAVE_HIGH_16x8: {
       masm.interleaveHighInt16x8(lhs, rhs, dest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4: {
+    case SimdShuffleOp::INTERLEAVE_HIGH_32x4: {
       masm.interleaveHighInt32x4(lhs, rhs, dest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_HIGH_64x2: {
+    case SimdShuffleOp::INTERLEAVE_HIGH_64x2: {
       masm.interleaveHighInt64x2(lhs, rhs, dest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_LOW_8x16: {
+    case SimdShuffleOp::INTERLEAVE_LOW_8x16: {
       masm.interleaveLowInt8x16(lhs, rhs, dest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_LOW_16x8: {
+    case SimdShuffleOp::INTERLEAVE_LOW_16x8: {
       masm.interleaveLowInt16x8(lhs, rhs, dest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_LOW_32x4: {
+    case SimdShuffleOp::INTERLEAVE_LOW_32x4: {
       masm.interleaveLowInt32x4(lhs, rhs, dest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_LOW_64x2: {
+    case SimdShuffleOp::INTERLEAVE_LOW_64x2: {
       masm.interleaveLowInt64x2(lhs, rhs, dest);
       break;
     }
-    case LWasmShuffleSimd128::SHUFFLE_BLEND_8x16: {
+    case SimdShuffleOp::SHUFFLE_BLEND_8x16: {
       masm.shuffleInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()),
                           lhs, rhs, dest);
       break;
@@ -3575,23 +3575,23 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
   FloatRegister dest = ToFloatRegister(ins->output());
   SimdConstant control = ins->control();
   switch (ins->op()) {
-    case LWasmPermuteSimd128::BROADCAST_8x16: {
+    case SimdPermuteOp::BROADCAST_8x16: {
       const SimdConstant::I8x16& mask = control.asInt8x16();
       int8_t source = mask[0];
       masm.splatX16(source, src, dest);
       break;
     }
-    case LWasmPermuteSimd128::BROADCAST_16x8: {
+    case SimdPermuteOp::BROADCAST_16x8: {
       const SimdConstant::I16x8& mask = control.asInt16x8();
       int16_t source = mask[0];
       masm.splatX8(source, src, dest);
       break;
     }
-    case LWasmPermuteSimd128::MOVE: {
+    case SimdPermuteOp::MOVE: {
       masm.moveSimd128(src, dest);
       break;
     }
-    case LWasmPermuteSimd128::PERMUTE_8x16: {
+    case SimdPermuteOp::PERMUTE_8x16: {
       const SimdConstant::I8x16& mask = control.asInt8x16();
 #  ifdef DEBUG
       mozilla::DebugOnly<int> i;
@@ -3602,7 +3602,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
       masm.permuteInt8x16(reinterpret_cast<const uint8_t*>(mask), src, dest);
       break;
     }
-    case LWasmPermuteSimd128::PERMUTE_16x8: {
+    case SimdPermuteOp::PERMUTE_16x8: {
       const SimdConstant::I16x8& mask = control.asInt16x8();
 #  ifdef DEBUG
       mozilla::DebugOnly<int> i;
@@ -3613,7 +3613,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
       masm.permuteInt16x8(reinterpret_cast<const uint16_t*>(mask), src, dest);
       break;
     }
-    case LWasmPermuteSimd128::PERMUTE_32x4: {
+    case SimdPermuteOp::PERMUTE_32x4: {
       const SimdConstant::I32x4& mask = control.asInt32x4();
 #  ifdef DEBUG
       mozilla::DebugOnly<int> i;
@@ -3624,19 +3624,19 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
       masm.permuteInt32x4(reinterpret_cast<const uint32_t*>(mask), src, dest);
       break;
     }
-    case LWasmPermuteSimd128::ROTATE_RIGHT_8x16: {
+    case SimdPermuteOp::ROTATE_RIGHT_8x16: {
       int8_t count = control.asInt8x16()[0];
       MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
       masm.rotateRightSimd128(src, dest, count);
       break;
     }
-    case LWasmPermuteSimd128::SHIFT_LEFT_8x16: {
+    case SimdPermuteOp::SHIFT_LEFT_8x16: {
       int8_t count = control.asInt8x16()[0];
       MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
       masm.leftShiftSimd128(Imm32(count), src, dest);
       break;
     }
-    case LWasmPermuteSimd128::SHIFT_RIGHT_8x16: {
+    case SimdPermuteOp::SHIFT_RIGHT_8x16: {
       int8_t count = control.asInt8x16()[0];
       MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
       masm.rightShiftSimd128(Imm32(count), src, dest);
diff --git a/js/src/jit/arm64/Lowering-arm64.cpp b/js/src/jit/arm64/Lowering-arm64.cpp
index 2c9c3689070db..212ea8aaf85bf 100644
--- a/js/src/jit/arm64/Lowering-arm64.cpp
+++ b/js/src/jit/arm64/Lowering-arm64.cpp
@@ -1120,29 +1120,26 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
   MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
   MOZ_ASSERT(ins->type() == MIRType::Simd128);
 
-  Shuffle s = AnalyzeShuffle(ins);
-#  ifdef DEBUG
-  ReportShuffleSpecialization(s);
-#  endif
+  SimdShuffle s = ins->shuffle();
   switch (s.opd) {
-    case Shuffle::Operand::LEFT:
-    case Shuffle::Operand::RIGHT: {
+    case SimdShuffle::Operand::LEFT:
+    case SimdShuffle::Operand::RIGHT: {
       LAllocation src;
       switch (*s.permuteOp) {
-        case LWasmPermuteSimd128::MOVE:
-        case LWasmPermuteSimd128::BROADCAST_8x16:
-        case LWasmPermuteSimd128::BROADCAST_16x8:
-        case LWasmPermuteSimd128::PERMUTE_8x16:
-        case LWasmPermuteSimd128::PERMUTE_16x8:
-        case LWasmPermuteSimd128::PERMUTE_32x4:
-        case LWasmPermuteSimd128::ROTATE_RIGHT_8x16:
-        case LWasmPermuteSimd128::SHIFT_LEFT_8x16:
-        case LWasmPermuteSimd128::SHIFT_RIGHT_8x16:
+        case SimdPermuteOp::MOVE:
+        case SimdPermuteOp::BROADCAST_8x16:
+        case SimdPermuteOp::BROADCAST_16x8:
+        case SimdPermuteOp::PERMUTE_8x16:
+        case SimdPermuteOp::PERMUTE_16x8:
+        case SimdPermuteOp::PERMUTE_32x4:
+        case SimdPermuteOp::ROTATE_RIGHT_8x16:
+        case SimdPermuteOp::SHIFT_LEFT_8x16:
+        case SimdPermuteOp::SHIFT_RIGHT_8x16:
           break;
         default:
           MOZ_CRASH("Unexpected operator");
       }
-      if (s.opd == Shuffle::Operand::LEFT) {
+      if (s.opd == SimdShuffle::Operand::LEFT) {
         src = useRegisterAtStart(ins->lhs());
       } else {
         src = useRegisterAtStart(ins->rhs());
@@ -1152,12 +1149,12 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
       define(lir, ins);
       break;
     }
-    case Shuffle::Operand::BOTH:
-    case Shuffle::Operand::BOTH_SWAPPED: {
+    case SimdShuffle::Operand::BOTH:
+    case SimdShuffle::Operand::BOTH_SWAPPED: {
       LDefinition temp = LDefinition::BogusTemp();
       LAllocation lhs;
       LAllocation rhs;
-      if (s.opd == Shuffle::Operand::BOTH) {
+      if (s.opd == SimdShuffle::Operand::BOTH) {
         lhs = useRegisterAtStart(ins->lhs());
         rhs = useRegisterAtStart(ins->rhs());
       } else {
diff --git a/js/src/jit/moz.build b/js/src/jit/moz.build
index 85c2aa191db01..d811aa17c9987 100644
--- a/js/src/jit/moz.build
+++ b/js/src/jit/moz.build
@@ -78,6 +78,7 @@ UNIFIED_SOURCES += [
     "shared/CodeGenerator-shared.cpp",
     "shared/Disassembler-shared.cpp",
     "shared/Lowering-shared.cpp",
+    "ShuffleAnalysis.cpp",
     "Sink.cpp",
     "Snapshots.cpp",
     "TrialInlining.cpp",
diff --git a/js/src/jit/shared/LIR-shared.h b/js/src/jit/shared/LIR-shared.h
index 2a0e488355694..eed4d0f9b4f3f 100644
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -3614,40 +3614,8 @@ class LWasmSignReplicationSimd128 : public LInstructionHelper<1, 1, 0> {
 // (v128, v128, imm_simd) -> v128 effect-free operation.
 // temp is FPR (and always in use).
 class LWasmShuffleSimd128 : public LInstructionHelper<1, 2, 1> {
- public:
-  // Shuffle operations.  NOTE: these may still be x86-centric, but the set can
-  // accomodate operations from other architectures.
-  enum Op {
-    // Blend bytes.  control_ has the blend mask as an I8x16: 0 to select from
-    // the lhs, -1 to select from the rhs.
-    BLEND_8x16,
-
-    // Blend words.  control_ has the blend mask as an I16x8: 0 to select from
-    // the lhs, -1 to select from the rhs.
-    BLEND_16x8,
-
-    // Concat the lhs in front of the rhs and shift right by bytes, extracting
-    // the low 16 bytes; control_[0] has the shift count.
-    CONCAT_RIGHT_SHIFT_8x16,
-
-    // Interleave qwords/dwords/words/bytes from high/low halves of operands.
-    // The low-order item in the result comes from the lhs, then the next from
-    // the rhs, and so on.  control_ is ignored.
-    INTERLEAVE_HIGH_8x16,
-    INTERLEAVE_HIGH_16x8,
-    INTERLEAVE_HIGH_32x4,
-    INTERLEAVE_HIGH_64x2,
-    INTERLEAVE_LOW_8x16,
-    INTERLEAVE_LOW_16x8,
-    INTERLEAVE_LOW_32x4,
-    INTERLEAVE_LOW_64x2,
-
-    // Fully general shuffle+blend.  control_ has the shuffle mask.
-    SHUFFLE_BLEND_8x16,
-  };
-
  private:
-  Op op_;
+  SimdShuffleOp op_;
   SimdConstant control_;
 
  public:
@@ -3658,7 +3626,8 @@ class LWasmShuffleSimd128 : public LInstructionHelper<1, 2, 1> {
   static constexpr uint32_t Rhs = 1;
 
   LWasmShuffleSimd128(const LAllocation& lhs, const LAllocation& rhs,
-                      const LDefinition& temp, Op op, SimdConstant control)
+                      const LDefinition& temp, SimdShuffleOp op,
+                      SimdConstant control)
       : LInstructionHelper(classOpcode), op_(op), control_(control) {
     setOperand(Lhs, lhs);
     setOperand(Rhs, rhs);
@@ -3669,58 +3638,14 @@ class LWasmShuffleSimd128 : public LInstructionHelper<1, 2, 1> {
   const LAllocation* lhsDest() { return getOperand(LhsDest); }
   const LAllocation* rhs() { return getOperand(Rhs); }
   const LDefinition* temp() { return getTemp(0); }
-  Op op() { return op_; }
+  SimdShuffleOp op() { return op_; }
   SimdConstant control() { return control_; }
 };
 
 // (v128, imm_simd) -> v128 effect-free operation.
 class LWasmPermuteSimd128 : public LInstructionHelper<1, 1, 0> {
- public:
-  // Permutation operations.  NOTE: these may still be x86-centric, but the set
-  // can accomodate operations from other architectures.
-  //
-  // The "low-order" byte is in lane 0 of an 8x16 datum, the "high-order" byte
-  // in lane 15.  The low-order byte is also the "rightmost".  In wasm, the
-  // constant (v128.const i8x16 0 1 2 ... 15) has 0 in the low-order byte and 15
-  // in the high-order byte.
-  enum Op {
-    // A single byte lane is copied into all the other byte lanes.  control_[0]
-    // has the source lane.
-    BROADCAST_8x16,
-
-    // A single word lane is copied into all the other word lanes.  control_[0]
-    // has the source lane.
-    BROADCAST_16x8,
-
-    // Copy input to output.
-    MOVE,
-
-    // control_ has bytes in range 0..15 s.t. control_[i] holds the source lane
-    // for output lane i.
-    PERMUTE_8x16,
-
-    // control_ has int16s in range 0..7, as for 8x16.  In addition, the high
-    // byte of control_[0] has flags detailing the operation, values taken
-    // from the Perm16x8Action enum below.
-    PERMUTE_16x8,
-
-    // control_ has int32s in range 0..3, as for 8x16.
-    PERMUTE_32x4,
-
-    // control_[0] has the number of places to rotate by.
-    ROTATE_RIGHT_8x16,
-
-    // Zeroes are shifted into high-order bytes and low-order bytes are lost.
-    // control_[0] has the number of places to shift by.
-    SHIFT_RIGHT_8x16,
-
-    // Zeroes are shifted into low-order bytes and high-order bytes are lost.
-    // control_[0] has the number of places to shift by.
-    SHIFT_LEFT_8x16,
-  };
-
  private:
-  Op op_;
+  SimdPermuteOp op_;
   SimdConstant control_;
 
  public:
@@ -3728,13 +3653,14 @@ class LWasmPermuteSimd128 : public LInstructionHelper<1, 1, 0> {
 
   static constexpr uint32_t Src = 0;
 
-  LWasmPermuteSimd128(const LAllocation& src, Op op, SimdConstant control)
+  LWasmPermuteSimd128(const LAllocation& src, SimdPermuteOp op,
+                      SimdConstant control)
       : LInstructionHelper(classOpcode), op_(op), control_(control) {
     setOperand(Src, src);
   }
 
   const LAllocation* src() { return getOperand(Src); }
-  Op op() { return op_; }
+  SimdPermuteOp op() { return op_; }
   SimdConstant control() { return control_; }
 };
 
diff --git a/js/src/jit/shared/Lowering-shared.cpp b/js/src/jit/shared/Lowering-shared.cpp
index 18bfe0cdb23ab..b363f6096fa20 100644
--- a/js/src/jit/shared/Lowering-shared.cpp
+++ b/js/src/jit/shared/Lowering-shared.cpp
@@ -338,701 +338,3 @@ void LIRGeneratorShared::lowerWasmCompareAndSelect(MWasmSelect* ins,
       useRegisterAtStart(ins->trueExpr()), useAny(ins->falseExpr()));
   defineReuseInput(lir, ins, LWasmCompareAndSelect::IfTrueExprIndex);
 }
-
-#ifdef ENABLE_WASM_SIMD
-
-// Specialization analysis for SIMD operations.  This is still x86-centric but
-// generalizes fairly easily to other architectures.
-
-// Optimization of v8x16.shuffle.  The general byte shuffle+blend is very
-// expensive (equivalent to at least a dozen instructions), and we want to avoid
-// that if we can.  So look for special cases - there are many.
-//
-// The strategy is to sort the operation into one of three buckets depending
-// on the shuffle pattern and inputs:
-//
-//  - single operand; shuffles on these values are rotations, reversals,
-//    transpositions, and general permutations
-//  - single-operand-with-interesting-constant (especially zero); shuffles on
-//    these values are often byte shift or scatter operations
-//  - dual operand; shuffles on these operations are blends, catenated
-//    shifts, and (in the worst case) general shuffle+blends
-//
-// We're not trying to solve the general problem, only to lower reasonably
-// expressed patterns that express common operations.  Producers that produce
-// dense and convoluted patterns will end up with the general byte shuffle.
-// Producers that produce simpler patterns that easily map to hardware will
-// get faster code.
-//
-// In particular, these matchers do not try to combine transformations, so a
-// shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is
-// usually going to end up as a general byte shuffle.
-
-// Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return
-// true, updating *control.
-static bool ByteMaskToWordMask(SimdConstant* control) {
-  const SimdConstant::I8x16& lanes = control->asInt8x16();
-  int16_t controlWords[8];
-  for (int i = 0; i < 16; i += 2) {
-    if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) {
-      return false;
-    }
-    controlWords[i / 2] = lanes[i] / 2;
-  }
-  *control = SimdConstant::CreateX8(controlWords);
-  return true;
-}
-
-// Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return
-// true, updating *control.
-static bool ByteMaskToDWordMask(SimdConstant* control) {
-  const SimdConstant::I8x16& lanes = control->asInt8x16();
-  int32_t controlDWords[4];
-  for (int i = 0; i < 16; i += 4) {
-    if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 &&
-          lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) {
-      return false;
-    }
-    controlDWords[i / 4] = lanes[i] / 4;
-  }
-  *control = SimdConstant::CreateX4(controlDWords);
-  return true;
-}
-
-// Reduce a 0..31 byte mask to a 0..3 qword mask if possible and if so return
-// true, updating *control.
-static bool ByteMaskToQWordMask(SimdConstant* control) {
-  const SimdConstant::I8x16& lanes = control->asInt8x16();
-  int64_t controlQWords[2];
-  for (int i = 0; i < 16; i += 8) {
-    if (!((lanes[i] & 7) == 0 && lanes[i + 1] == lanes[i] + 1 &&
-          lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3 &&
-          lanes[i + 4] == lanes[i] + 4 && lanes[i + 5] == lanes[i] + 5 &&
-          lanes[i + 6] == lanes[i] + 6 && lanes[i + 7] == lanes[i] + 7)) {
-      return false;
-    }
-    controlQWords[i / 8] = lanes[i] / 8;
-  }
-  *control = SimdConstant::CreateX2(controlQWords);
-  return true;
-}
-
-// Skip across consecutive values in lanes starting at i, returning the index
-// after the last element.  Lane values must be <= len-1 ("masked").
-//
-// Since every element is a 1-element run, the return value is never the same as
-// the starting i.
-template <typename T>
-static int ScanIncreasingMasked(const T* lanes, int i) {
-  int len = int(16 / sizeof(T));
-  MOZ_ASSERT(i < len);
-  MOZ_ASSERT(lanes[i] <= len - 1);
-  i++;
-  while (i < len && lanes[i] == lanes[i - 1] + 1) {
-    MOZ_ASSERT(lanes[i] <= len - 1);
-    i++;
-  }
-  return i;
-}
-
-// Skip across consecutive values in lanes starting at i, returning the index
-// after the last element.  Lane values must be <= len*2-1 ("unmasked"); the
-// values len-1 and len are not considered consecutive.
-//
-// Since every element is a 1-element run, the return value is never the same as
-// the starting i.
-template <typename T>
-static int ScanIncreasingUnmasked(const T* lanes, int i) {
-  int len = int(16 / sizeof(T));
-  MOZ_ASSERT(i < len);
-  if (lanes[i] < len) {
-    i++;
-    while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) {
-      i++;
-    }
-  } else {
-    i++;
-    while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) {
-      i++;
-    }
-  }
-  return i;
-}
-
-// Skip lanes that equal v starting at i, returning the index just beyond the
-// last of those.  There is no requirement that the initial lanes[i] == v.
-template <typename T>
-static int ScanConstant(const T* lanes, int v, int i) {
-  int len = int(16 / sizeof(T));
-  MOZ_ASSERT(i <= len);
-  while (i < len && lanes[i] == v) {
-    i++;
-  }
-  return i;
-}
-
-// Mask lane values denoting rhs elements into lhs elements.
-template <typename T>
-static void MaskLanes(T* result, const T* input) {
-  int len = int(16 / sizeof(T));
-  for (int i = 0; i < len; i++) {
-    result[i] = input[i] & (len - 1);
-  }
-}
-
-// Apply a transformation to each lane value.
-template <typename T>
-static void MapLanes(T* result, const T* input, int (*f)(int)) {
-  int len = int(16 / sizeof(T));
-  for (int i = 0; i < len; i++) {
-    result[i] = f(input[i]);
-  }
-}
-
-// Recognize an identity permutation, assuming lanes is masked.
-template <typename T>
-static bool IsIdentity(const T* lanes) {
-  return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T));
-}
-
-// Recognize part of an identity permutation starting at start, with
-// the first value of the permutation expected to be bias.
-template <typename T>
-static bool IsIdentity(const T* lanes, int start, int len, int bias) {
-  if (lanes[start] != bias) {
-    return false;
-  }
-  for (int i = start + 1; i < start + len; i++) {
-    if (lanes[i] != lanes[i - 1] + 1) {
-      return false;
-    }
-  }
-  return true;
-}
-
-// We can permute by dwords if the mask is reducible to a dword mask, and in
-// this case a single PSHUFD is enough.
-static bool TryPermute32x4(SimdConstant* control) {
-  SimdConstant tmp = *control;
-  if (!ByteMaskToDWordMask(&tmp)) {
-    return false;
-  }
-  *control = tmp;
-  return true;
-}
-
-// Can we perform a byte rotate right?  We can use PALIGNR.  The shift count is
-// just lanes[0], and *control is unchanged.
-static bool TryRotateRight8x16(SimdConstant* control) {
-  const SimdConstant::I8x16& lanes = control->asInt8x16();
-  // Look for the end of the first run of consecutive bytes.
-  int i = ScanIncreasingMasked(lanes, 0);
-
-  // First run must start at a value s.t. we have a rotate if all remaining
-  // bytes are a run.
-  if (lanes[0] != 16 - i) {
-    return false;
-  }
-
-  // If we reached the end of the vector, we're done.
-  if (i == 16) {
-    return true;
-  }
-
-  // Second run must start at source lane zero.
-  if (lanes[i] != 0) {
-    return false;
-  }
-
-  // Second run must end at the end of the lane vector.
-  return ScanIncreasingMasked(lanes, i) == 16;
-}
-
-// We can permute by words if the mask is reducible to a word mask.
-static bool TryPermute16x8(SimdConstant* control) {
-  SimdConstant tmp = *control;
-  if (!ByteMaskToWordMask(&tmp)) {
-    return false;
-  }
-  *control = tmp;
-  return true;
-}
-
-// A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD.
-static bool TryBroadcast16x8(SimdConstant* control) {
-  SimdConstant tmp = *control;
-  if (!ByteMaskToWordMask(&tmp)) {
-    return false;
-  }
-  const SimdConstant::I16x8& lanes = tmp.asInt16x8();
-  if (ScanConstant(lanes, lanes[0], 0) < 8) {
-    return false;
-  }
-  *control = tmp;
-  return true;
-}
-
-// A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W +
-// PSHUFD.
-static bool TryBroadcast8x16(SimdConstant* control) {
-  const SimdConstant::I8x16& lanes = control->asInt8x16();
-  if (ScanConstant(lanes, lanes[0], 0) < 16) {
-    return false;
-  }
-  return true;
-}
-
-// Look for permutations of a single operand.
-static LWasmPermuteSimd128::Op AnalyzePermute(SimdConstant* control) {
-  // Lane indices are input-agnostic for single-operand permutations.
-  SimdConstant::I8x16 controlBytes;
-  MaskLanes(controlBytes, control->asInt8x16());
-
-  // Get rid of no-ops immediately, so nobody else needs to check.
-  if (IsIdentity(controlBytes)) {
-    return LWasmPermuteSimd128::MOVE;
-  }
-
-  // Default control is the masked bytes.
-  *control = SimdConstant::CreateX16(controlBytes);
-
-  // Analysis order matters here and is architecture-dependent or even
-  // microarchitecture-dependent: ideally the cheapest implementation first.
-  // The Intel manual says that the cost of a PSHUFB is about five other
-  // operations, so make that our cutoff.
-  //
-  // Word, dword, and qword reversals are handled optimally by general permutes.
-  //
-  // Byte reversals are probably best left to PSHUFB, no alternative rendition
-  // seems to reliably go below five instructions.  (Discuss.)
-  //
-  // Word swaps within doublewords and dword swaps within quadwords are handled
-  // optimally by general permutes.
-  //
-  // Dword and qword broadcasts are handled by dword permute.
-
-  if (TryPermute32x4(control)) {
-    return LWasmPermuteSimd128::PERMUTE_32x4;
-  }
-  if (TryRotateRight8x16(control)) {
-    return LWasmPermuteSimd128::ROTATE_RIGHT_8x16;
-  }
-  if (TryBroadcast16x8(control)) {
-    return LWasmPermuteSimd128::BROADCAST_16x8;
-  }
-  if (TryPermute16x8(control)) {
-    return LWasmPermuteSimd128::PERMUTE_16x8;
-  }
-  if (TryBroadcast8x16(control)) {
-    return LWasmPermuteSimd128::BROADCAST_8x16;
-  }
-
-  // TODO: (From v8) Unzip and transpose generally have renditions that slightly
-  // beat a general permute (three or four instructions)
-  //
-  // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be
-  // used when merging two values.
-  //
-  // TODO: Byteswap is MOV + PSLLW + PSRLW + POR, a small win over PSHUFB.
-
-  // The default operation is to permute bytes with the default control.
-  return LWasmPermuteSimd128::PERMUTE_8x16;
-}
-
-// Can we shift the bytes left or right by a constant?  A shift is a run of
-// lanes from the rhs (which is zero) on one end and a run of values from the
-// lhs on the other end.
-static Maybe<LWasmPermuteSimd128::Op> TryShift8x16(SimdConstant* control) {
-  const SimdConstant::I8x16& lanes = control->asInt8x16();
-
-  // Represent all zero lanes by 16
-  SimdConstant::I8x16 zeroesMasked;
-  MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; });
-
-  int i = ScanConstant(zeroesMasked, 16, 0);
-  int shiftLeft = i;
-  if (shiftLeft > 0 && lanes[shiftLeft] != 0) {
-    return Nothing();
-  }
-
-  i = ScanIncreasingUnmasked(zeroesMasked, i);
-  int shiftRight = 16 - i;
-  if (shiftRight > 0 && lanes[i - 1] != 15) {
-    return Nothing();
-  }
-
-  i = ScanConstant(zeroesMasked, 16, i);
-  if (i < 16 || (shiftRight > 0 && shiftLeft > 0) ||
-      (shiftRight == 0 && shiftLeft == 0)) {
-    return Nothing();
-  }
-
-  if (shiftRight) {
-    *control = SimdConstant::SplatX16(shiftRight);
-    return Some(LWasmPermuteSimd128::SHIFT_RIGHT_8x16);
-  }
-  *control = SimdConstant::SplatX16(shiftLeft);
-  return Some(LWasmPermuteSimd128::SHIFT_LEFT_8x16);
-}
-
-static Maybe<LWasmPermuteSimd128::Op> AnalyzeShuffleWithZero(
-    SimdConstant* control) {
-  Maybe<LWasmPermuteSimd128::Op> op;
-  op = TryShift8x16(control);
-  if (op) {
-    return op;
-  }
-
-  // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST;
-  // PAND.  This may beat the general byte blend code below.
-  return Nothing();
-}
-
-// Concat: if the result is the suffix (high bytes) of the rhs in front of a
-// prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are
-// swapped.
-static Maybe<LWasmShuffleSimd128::Op> TryConcatRightShift8x16(
-    SimdConstant* control, bool* swapOperands) {
-  const SimdConstant::I8x16& lanes = control->asInt8x16();
-  int i = ScanIncreasingUnmasked(lanes, 0);
-  MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere");
-  // First run must end with 15 % 16
-  if ((lanes[i - 1] & 15) != 15) {
-    return Nothing();
-  }
-  // Second run must start with 0 % 16
-  if ((lanes[i] & 15) != 0) {
-    return Nothing();
-  }
-  // The two runs must come from different inputs
-  if ((lanes[i] & 16) == (lanes[i - 1] & 16)) {
-    return Nothing();
-  }
-  int suffixLength = i;
-
-  i = ScanIncreasingUnmasked(lanes, i);
-  // Must end at the left end
-  if (i != 16) {
-    return Nothing();
-  }
-
-  // If the suffix is from the lhs then swap the operands
-  if (lanes[0] < 16) {
-    *swapOperands = !*swapOperands;
-  }
-  *control = SimdConstant::SplatX16(suffixLength);
-  return Some(LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16);
-}
-
-// Blend words: if we pick words from both operands without a pattern but all
-// the input words stay in their position then this is PBLENDW (immediate mask);
-// this also handles all larger sizes on x64.
-static Maybe<LWasmShuffleSimd128::Op> TryBlendInt16x8(SimdConstant* control) {
-  SimdConstant tmp(*control);
-  if (!ByteMaskToWordMask(&tmp)) {
-    return Nothing();
-  }
-  SimdConstant::I16x8 masked;
-  MaskLanes(masked, tmp.asInt16x8());
-  if (!IsIdentity(masked)) {
-    return Nothing();
-  }
-  SimdConstant::I16x8 mapped;
-  MapLanes(mapped, tmp.asInt16x8(),
-           [](int x) -> int { return x < 8 ? 0 : -1; });
-  *control = SimdConstant::CreateX8(mapped);
-  return Some(LWasmShuffleSimd128::BLEND_16x8);
-}
-
-// Blend bytes: if we pick bytes ditto then this is a byte blend, which can be
-// handled with a CONST, PAND, PANDNOT, and POR.
-//
-// TODO: Optimization opportunity? If we pick all but one lanes from one with at
-// most one from the other then it could be a MOV + PEXRB + PINSRB (also if this
-// element is not in its source location).
-static Maybe<LWasmShuffleSimd128::Op> TryBlendInt8x16(SimdConstant* control) {
-  SimdConstant::I8x16 masked;
-  MaskLanes(masked, control->asInt8x16());
-  if (!IsIdentity(masked)) {
-    return Nothing();
-  }
-  SimdConstant::I8x16 mapped;
-  MapLanes(mapped, control->asInt8x16(),
-           [](int x) -> int { return x < 16 ? 0 : -1; });
-  *control = SimdConstant::CreateX16(mapped);
-  return Some(LWasmShuffleSimd128::BLEND_8x16);
-}
-
-template <typename T>
-static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) {
-  for (int i = 0; i < len; i++) {
-    if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) {
-      return false;
-    }
-  }
-  return true;
-}
-
-// Unpack/interleave:
-//  - if we interleave the low (bytes/words/doublewords) of the inputs into
-//    the output then this is UNPCKL*W (possibly with a swap of operands).
-//  - if we interleave the high ditto then it is UNPCKH*W (ditto)
-template <typename T>
-static Maybe<LWasmShuffleSimd128::Op> TryInterleave(
-    const T* lanes, int lhs, int rhs, bool* swapOperands,
-    LWasmShuffleSimd128::Op lowOp, LWasmShuffleSimd128::Op highOp) {
-  int len = int(32 / (sizeof(T) * 4));
-  if (MatchInterleave(lanes, lhs, rhs, len)) {
-    return Some(lowOp);
-  }
-  if (MatchInterleave(lanes, rhs, lhs, len)) {
-    *swapOperands = !*swapOperands;
-    return Some(lowOp);
-  }
-  if (MatchInterleave(lanes, lhs + len, rhs + len, len)) {
-    return Some(highOp);
-  }
-  if (MatchInterleave(lanes, rhs + len, lhs + len, len)) {
-    *swapOperands = !*swapOperands;
-    return Some(highOp);
-  }
-  return Nothing();
-}
-
-static Maybe<LWasmShuffleSimd128::Op> TryInterleave64x2(SimdConstant* control,
-                                                        bool* swapOperands) {
-  SimdConstant tmp = *control;
-  if (!ByteMaskToQWordMask(&tmp)) {
-    return Nothing();
-  }
-  const SimdConstant::I64x2& lanes = tmp.asInt64x2();
-  return TryInterleave(lanes, 0, 2, swapOperands,
-                       LWasmShuffleSimd128::INTERLEAVE_LOW_64x2,
-                       LWasmShuffleSimd128::INTERLEAVE_HIGH_64x2);
-}
-
-static Maybe<LWasmShuffleSimd128::Op> TryInterleave32x4(SimdConstant* control,
-                                                        bool* swapOperands) {
-  SimdConstant tmp = *control;
-  if (!ByteMaskToDWordMask(&tmp)) {
-    return Nothing();
-  }
-  const SimdConstant::I32x4& lanes = tmp.asInt32x4();
-  return TryInterleave(lanes, 0, 4, swapOperands,
-                       LWasmShuffleSimd128::INTERLEAVE_LOW_32x4,
-                       LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4);
-}
-
-static Maybe<LWasmShuffleSimd128::Op> TryInterleave16x8(SimdConstant* control,
-                                                        bool* swapOperands) {
-  SimdConstant tmp = *control;
-  if (!ByteMaskToWordMask(&tmp)) {
-    return Nothing();
-  }
-  const SimdConstant::I16x8& lanes = tmp.asInt16x8();
-  return TryInterleave(lanes, 0, 8, swapOperands,
-                       LWasmShuffleSimd128::INTERLEAVE_LOW_16x8,
-                       LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8);
-}
-
-static Maybe<LWasmShuffleSimd128::Op> TryInterleave8x16(SimdConstant* control,
-                                                        bool* swapOperands) {
-  const SimdConstant::I8x16& lanes = control->asInt8x16();
-  return TryInterleave(lanes, 0, 16, swapOperands,
-                       LWasmShuffleSimd128::INTERLEAVE_LOW_8x16,
-                       LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16);
-}
-
-static LWasmShuffleSimd128::Op AnalyzeTwoArgShuffle(SimdConstant* control,
-                                                    bool* swapOperands) {
-  Maybe<LWasmShuffleSimd128::Op> op;
-  op = TryConcatRightShift8x16(control, swapOperands);
-  if (!op) {
-    op = TryBlendInt16x8(control);
-  }
-  if (!op) {
-    op = TryBlendInt8x16(control);
-  }
-  if (!op) {
-    op = TryInterleave64x2(control, swapOperands);
-  }
-  if (!op) {
-    op = TryInterleave32x4(control, swapOperands);
-  }
-  if (!op) {
-    op = TryInterleave16x8(control, swapOperands);
-  }
-  if (!op) {
-    op = TryInterleave8x16(control, swapOperands);
-  }
-  if (!op) {
-    op = Some(LWasmShuffleSimd128::SHUFFLE_BLEND_8x16);
-  }
-  return *op;
-}
-
-// Reorder the operands if that seems useful, notably, move a constant to the
-// right hand side.  Rewrites the control to account for any move.
-static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs,
-                                        SimdConstant* control) {
-  if ((*lhs)->isWasmFloatConstant()) {
-    MDefinition* tmp = *lhs;
-    *lhs = *rhs;
-    *rhs = tmp;
-
-    int8_t controlBytes[16];
-    const SimdConstant::I8x16& lanes = control->asInt8x16();
-    for (unsigned i = 0; i < 16; i++) {
-      controlBytes[i] = lanes[i] ^ 16;
-    }
-    *control = SimdConstant::CreateX16(controlBytes);
-
-    return true;
-  }
-  return false;
-}
-
-Shuffle LIRGeneratorShared::AnalyzeShuffle(MWasmShuffleSimd128* ins) {
-  // Control may be updated, but only once we commit to an operation or when we
-  // swap operands.
-  SimdConstant control = ins->control();
-  MDefinition* lhs = ins->lhs();
-  MDefinition* rhs = ins->rhs();
-
-  // If only one of the inputs is used, determine which.
-  bool useLeft = true;
-  bool useRight = true;
-  if (lhs == rhs) {
-    useRight = false;
-  } else {
-    bool allAbove = true;
-    bool allBelow = true;
-    const SimdConstant::I8x16& lanes = control.asInt8x16();
-    for (unsigned i = 0; i < 16; i++) {
-      allAbove = allAbove && lanes[i] >= 16;
-      allBelow = allBelow && lanes[i] < 16;
-    }
-    if (allAbove) {
-      useLeft = false;
-    } else if (allBelow) {
-      useRight = false;
-    }
-  }
-
-  // Deal with one-ignored-input.
-  if (!(useLeft && useRight)) {
-    LWasmPermuteSimd128::Op op = AnalyzePermute(&control);
-    return Shuffle::permute(
-        useLeft ? Shuffle::Operand::LEFT : Shuffle::Operand::RIGHT, control,
-        op);
-  }
-
-  // Move constants to rhs.
-  bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control);
-
-  // Deal with constant rhs.
-  if (rhs->isWasmFloatConstant()) {
-    SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128();
-    if (rhsConstant.isZeroBits()) {
-      Maybe<LWasmPermuteSimd128::Op> op = AnalyzeShuffleWithZero(&control);
-      if (op) {
-        return Shuffle::permute(
-            swapOperands ? Shuffle::Operand::RIGHT : Shuffle::Operand::LEFT,
-            control, *op);
-      }
-    }
-  }
-
-  // Two operands both of which are used.  If there's one constant operand it is
-  // now on the rhs.
-  LWasmShuffleSimd128::Op op = AnalyzeTwoArgShuffle(&control, &swapOperands);
-  return Shuffle::shuffle(
-      swapOperands ? Shuffle::Operand::BOTH_SWAPPED : Shuffle::Operand::BOTH,
-      control, op);
-}
-
-#  ifdef DEBUG
-void LIRGeneratorShared::ReportShuffleSpecialization(const Shuffle& s) {
-  switch (s.opd) {
-    case Shuffle::Operand::BOTH:
-    case Shuffle::Operand::BOTH_SWAPPED:
-      switch (*s.shuffleOp) {
-        case LWasmShuffleSimd128::SHUFFLE_BLEND_8x16:
-          js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16");
-          break;
-        case LWasmShuffleSimd128::BLEND_8x16:
-          js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16");
-          break;
-        case LWasmShuffleSimd128::BLEND_16x8:
-          js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8");
-          break;
-        case LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16:
-          js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16");
-          break;
-        case LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16:
-          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16");
-          break;
-        case LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8:
-          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8");
-          break;
-        case LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4:
-          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4");
-          break;
-        case LWasmShuffleSimd128::INTERLEAVE_HIGH_64x2:
-          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 64x2");
-          break;
-        case LWasmShuffleSimd128::INTERLEAVE_LOW_8x16:
-          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16");
-          break;
-        case LWasmShuffleSimd128::INTERLEAVE_LOW_16x8:
-          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8");
-          break;
-        case LWasmShuffleSimd128::INTERLEAVE_LOW_32x4:
-          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4");
-          break;
-        case LWasmShuffleSimd128::INTERLEAVE_LOW_64x2:
-          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 64x2");
-          break;
-        default:
-          MOZ_CRASH("Unexpected shuffle op");
-      }
-      break;
-    case Shuffle::Operand::LEFT:
-    case Shuffle::Operand::RIGHT:
-      switch (*s.permuteOp) {
-        case LWasmPermuteSimd128::BROADCAST_8x16:
-          js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16");
-          break;
-        case LWasmPermuteSimd128::BROADCAST_16x8:
-          js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8");
-          break;
-        case LWasmPermuteSimd128::MOVE:
-          js::wasm::ReportSimdAnalysis("shuffle -> move");
-          break;
-        case LWasmPermuteSimd128::PERMUTE_8x16:
-          js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16");
-          break;
-        case LWasmPermuteSimd128::PERMUTE_16x8:
-          js::wasm::ReportSimdAnalysis("shuffle -> permute 16x8");
-          break;
-        case LWasmPermuteSimd128::PERMUTE_32x4:
-          js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4");
-          break;
-        case LWasmPermuteSimd128::ROTATE_RIGHT_8x16:
-          js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16");
-          break;
-        case LWasmPermuteSimd128::SHIFT_LEFT_8x16:
-          js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16");
-          break;
-        case LWasmPermuteSimd128::SHIFT_RIGHT_8x16:
-          js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16");
-          break;
-        default:
-          MOZ_CRASH("Unexpected permute op");
-      }
-      break;
-  }
-}
-#  endif  // DEBUG
-
-#endif  // ENABLE_WASM_SIMD
diff --git a/js/src/jit/shared/Lowering-shared.h b/js/src/jit/shared/Lowering-shared.h
index 9de7fcb7f5ac4..83adb2efbfe3e 100644
--- a/js/src/jit/shared/Lowering-shared.h
+++ b/js/src/jit/shared/Lowering-shared.h
@@ -22,45 +22,6 @@ class MDefinition;
 class MInstruction;
 class LOsiPoint;
 
-#ifdef ENABLE_WASM_SIMD
-
-// Representation of the result of the shuffle analysis.  See
-// Lowering-shared.cpp for more.
-
-struct Shuffle {
-  enum class Operand {
-    // Both inputs, in the original lhs-rhs order
-    BOTH,
-    // Both inputs, but in rhs-lhs order
-    BOTH_SWAPPED,
-    // Only the lhs input
-    LEFT,
-    // Only the rhs input
-    RIGHT,
-  };
-
-  Operand opd;
-  SimdConstant control;
-  mozilla::Maybe<LWasmPermuteSimd128::Op> permuteOp;  // Single operands
-  mozilla::Maybe<LWasmShuffleSimd128::Op> shuffleOp;  // Double operands
-
-  static Shuffle permute(Operand opd, SimdConstant control,
-                         LWasmPermuteSimd128::Op op) {
-    MOZ_ASSERT(opd == Operand::LEFT || opd == Operand::RIGHT);
-    Shuffle s{opd, control, mozilla::Some(op), mozilla::Nothing()};
-    return s;
-  }
-
-  static Shuffle shuffle(Operand opd, SimdConstant control,
-                         LWasmShuffleSimd128::Op op) {
-    MOZ_ASSERT(opd == Operand::BOTH || opd == Operand::BOTH_SWAPPED);
-    Shuffle s{opd, control, mozilla::Nothing(), mozilla::Some(op)};
-    return s;
-  }
-};
-
-#endif
-
 class LIRGeneratorShared {
  protected:
   MIRGenerator* gen;
@@ -111,13 +72,6 @@ class LIRGeneratorShared {
   static bool ShouldReorderCommutative(MDefinition* lhs, MDefinition* rhs,
                                        MInstruction* ins);
 
-#ifdef ENABLE_WASM_SIMD
-  static Shuffle AnalyzeShuffle(MWasmShuffleSimd128* ins);
-#  ifdef DEBUG
-  static void ReportShuffleSpecialization(const Shuffle& s);
-#  endif
-#endif
-
   // A backend can decide that an instruction should be emitted at its uses,
   // rather than at its definition. To communicate this, set the
   // instruction's virtual register set to 0. When using the instruction,
diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
index a6eab3026f547..da0df6c0cf372 100644
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -3043,65 +3043,65 @@ void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) {
   FloatRegister rhs = ToFloatRegister(ins->rhs());
   SimdConstant control = ins->control();
   switch (ins->op()) {
-    case LWasmShuffleSimd128::BLEND_8x16: {
+    case SimdShuffleOp::BLEND_8x16: {
       masm.blendInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()),
                         lhsDest, rhs, lhsDest, ToFloatRegister(ins->temp()));
       break;
     }
-    case LWasmShuffleSimd128::BLEND_16x8: {
+    case SimdShuffleOp::BLEND_16x8: {
       MOZ_ASSERT(ins->temp()->isBogusTemp());
       masm.blendInt16x8(reinterpret_cast<const uint16_t*>(control.asInt16x8()),
                         lhsDest, rhs, lhsDest);
       break;
     }
-    case LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16: {
+    case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: {
       MOZ_ASSERT(ins->temp()->isBogusTemp());
       int8_t count = 16 - control.asInt8x16()[0];
       MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
       masm.concatAndRightShiftSimd128(rhs, lhsDest, count);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16: {
+    case SimdShuffleOp::INTERLEAVE_HIGH_8x16: {
       MOZ_ASSERT(ins->temp()->isBogusTemp());
       masm.interleaveHighInt8x16(rhs, lhsDest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8: {
+    case SimdShuffleOp::INTERLEAVE_HIGH_16x8: {
       MOZ_ASSERT(ins->temp()->isBogusTemp());
       masm.interleaveHighInt16x8(rhs, lhsDest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4: {
+    case SimdShuffleOp::INTERLEAVE_HIGH_32x4: {
       MOZ_ASSERT(ins->temp()->isBogusTemp());
       masm.interleaveHighInt32x4(rhs, lhsDest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_HIGH_64x2: {
+    case SimdShuffleOp::INTERLEAVE_HIGH_64x2: {
       MOZ_ASSERT(ins->temp()->isBogusTemp());
       masm.interleaveHighInt64x2(rhs, lhsDest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_LOW_8x16: {
+    case SimdShuffleOp::INTERLEAVE_LOW_8x16: {
       MOZ_ASSERT(ins->temp()->isBogusTemp());
       masm.interleaveLowInt8x16(rhs, lhsDest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_LOW_16x8: {
+    case SimdShuffleOp::INTERLEAVE_LOW_16x8: {
       MOZ_ASSERT(ins->temp()->isBogusTemp());
       masm.interleaveLowInt16x8(rhs, lhsDest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_LOW_32x4: {
+    case SimdShuffleOp::INTERLEAVE_LOW_32x4: {
       MOZ_ASSERT(ins->temp()->isBogusTemp());
       masm.interleaveLowInt32x4(rhs, lhsDest);
       break;
     }
-    case LWasmShuffleSimd128::INTERLEAVE_LOW_64x2: {
+    case SimdShuffleOp::INTERLEAVE_LOW_64x2: {
       MOZ_ASSERT(ins->temp()->isBogusTemp());
       masm.interleaveLowInt64x2(rhs, lhsDest);
       break;
     }
-    case LWasmShuffleSimd128::SHUFFLE_BLEND_8x16: {
+    case SimdShuffleOp::SHUFFLE_BLEND_8x16: {
       masm.shuffleInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()),
                           rhs, lhsDest);
       break;
@@ -3204,7 +3204,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
   SimdConstant control = ins->control();
   switch (ins->op()) {
     // For broadcast, would MOVDDUP be better than PSHUFD for the last step?
-    case LWasmPermuteSimd128::BROADCAST_8x16: {
+    case SimdPermuteOp::BROADCAST_8x16: {
       const SimdConstant::I8x16& mask = control.asInt8x16();
       int8_t source = mask[0];
       if (src != dest) {
@@ -3229,7 +3229,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
       }
       break;
     }
-    case LWasmPermuteSimd128::BROADCAST_16x8: {
+    case SimdPermuteOp::BROADCAST_16x8: {
       const SimdConstant::I16x8& mask = control.asInt16x8();
       int16_t source = mask[0];
       uint16_t v = uint16_t(source & 3);
@@ -3245,11 +3245,11 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
       }
       break;
     }
-    case LWasmPermuteSimd128::MOVE: {
+    case SimdPermuteOp::MOVE: {
       masm.moveSimd128(src, dest);
       break;
     }
-    case LWasmPermuteSimd128::PERMUTE_8x16: {
+    case SimdPermuteOp::PERMUTE_8x16: {
       const SimdConstant::I8x16& mask = control.asInt8x16();
 #  ifdef DEBUG
       DebugOnly<int> i;
@@ -3260,7 +3260,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
       masm.permuteInt8x16(reinterpret_cast<const uint8_t*>(mask), src, dest);
       break;
     }
-    case LWasmPermuteSimd128::PERMUTE_16x8: {
+    case SimdPermuteOp::PERMUTE_16x8: {
 #  ifdef DEBUG
       const SimdConstant::I16x8& mask = control.asInt16x8();
       DebugOnly<int> i;
@@ -3297,7 +3297,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
       }
       break;
     }
-    case LWasmPermuteSimd128::PERMUTE_32x4: {
+    case SimdPermuteOp::PERMUTE_32x4: {
       const SimdConstant::I32x4& mask = control.asInt32x4();
 #  ifdef DEBUG
       DebugOnly<int> i;
@@ -3308,7 +3308,7 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
       masm.permuteInt32x4(reinterpret_cast<const uint32_t*>(mask), src, dest);
       break;
     }
-    case LWasmPermuteSimd128::ROTATE_RIGHT_8x16: {
+    case SimdPermuteOp::ROTATE_RIGHT_8x16: {
       if (src != dest) {
         masm.moveSimd128(src, dest);
       }
@@ -3317,13 +3317,13 @@ void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
       masm.concatAndRightShiftSimd128(dest, dest, count);
       break;
     }
-    case LWasmPermuteSimd128::SHIFT_LEFT_8x16: {
+    case SimdPermuteOp::SHIFT_LEFT_8x16: {
       int8_t count = control.asInt8x16()[0];
       MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
       masm.leftShiftSimd128(Imm32(count), src, dest);
       break;
     }
-    case LWasmPermuteSimd128::SHIFT_RIGHT_8x16: {
+    case SimdPermuteOp::SHIFT_RIGHT_8x16: {
       int8_t count = control.asInt8x16()[0];
       MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
       masm.rightShiftSimd128(Imm32(count), src, dest);
diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
index 3db6105544735..aae437485b8e7 100644
--- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -1227,13 +1227,10 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
   MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
   MOZ_ASSERT(ins->type() == MIRType::Simd128);
 
-  Shuffle s = AnalyzeShuffle(ins);
-#  ifdef DEBUG
-  ReportShuffleSpecialization(s);
-#  endif
+  SimdShuffle s = ins->shuffle();
   switch (s.opd) {
-    case Shuffle::Operand::LEFT:
-    case Shuffle::Operand::RIGHT: {
+    case SimdShuffle::Operand::LEFT:
+    case SimdShuffle::Operand::RIGHT: {
       LAllocation src;
       // All permute operators currently favor reusing the input register so
       // we're not currently exercising code paths below that do not reuse.
@@ -1241,21 +1238,21 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
       // to be correct.
       bool useAtStartAndReuse = false;
       switch (*s.permuteOp) {
-        case LWasmPermuteSimd128::MOVE:
-        case LWasmPermuteSimd128::BROADCAST_8x16:
-        case LWasmPermuteSimd128::BROADCAST_16x8:
-        case LWasmPermuteSimd128::PERMUTE_8x16:
-        case LWasmPermuteSimd128::PERMUTE_16x8:
-        case LWasmPermuteSimd128::PERMUTE_32x4:
-        case LWasmPermuteSimd128::ROTATE_RIGHT_8x16:
-        case LWasmPermuteSimd128::SHIFT_LEFT_8x16:
-        case LWasmPermuteSimd128::SHIFT_RIGHT_8x16:
+        case SimdPermuteOp::MOVE:
+        case SimdPermuteOp::BROADCAST_8x16:
+        case SimdPermuteOp::BROADCAST_16x8:
+        case SimdPermuteOp::PERMUTE_8x16:
+        case SimdPermuteOp::PERMUTE_16x8:
+        case SimdPermuteOp::PERMUTE_32x4:
+        case SimdPermuteOp::ROTATE_RIGHT_8x16:
+        case SimdPermuteOp::SHIFT_LEFT_8x16:
+        case SimdPermuteOp::SHIFT_RIGHT_8x16:
           useAtStartAndReuse = true;
           break;
         default:
           MOZ_CRASH("Unexpected operator");
       }
-      if (s.opd == Shuffle::Operand::LEFT) {
+      if (s.opd == SimdShuffle::Operand::LEFT) {
         if (useAtStartAndReuse) {
           src = useRegisterAtStart(ins->lhs());
         } else {
@@ -1277,11 +1274,11 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
       }
       break;
     }
-    case Shuffle::Operand::BOTH:
-    case Shuffle::Operand::BOTH_SWAPPED: {
+    case SimdShuffle::Operand::BOTH:
+    case SimdShuffle::Operand::BOTH_SWAPPED: {
       LDefinition temp = LDefinition::BogusTemp();
       switch (*s.shuffleOp) {
-        case LWasmShuffleSimd128::BLEND_8x16:
+        case SimdShuffleOp::BLEND_8x16:
           temp = tempFixed(xmm0);
           break;
         default:
@@ -1289,7 +1286,7 @@ void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
       }
       LAllocation lhs;
       LAllocation rhs;
-      if (s.opd == Shuffle::Operand::BOTH) {
+      if (s.opd == SimdShuffle::Operand::BOTH) {
         lhs = useRegisterAtStart(ins->lhs());
         rhs = useRegister(ins->rhs());
       } else {
diff --git a/js/src/wasm/WasmIonCompile.cpp b/js/src/wasm/WasmIonCompile.cpp
index 193ed1944a8f4..667a94672c707 100644
--- a/js/src/wasm/WasmIonCompile.cpp
+++ b/js/src/wasm/WasmIonCompile.cpp
@@ -27,6 +27,7 @@
 #include "jit/CompileInfo.h"
 #include "jit/Ion.h"
 #include "jit/IonOptimizationLevels.h"
+#include "jit/ShuffleAnalysis.h"
 #include "js/ScalarType.h"  // js::Scalar::Type
 #include "wasm/WasmBaselineCompile.h"
 #include "wasm/WasmBuiltins.h"
@@ -805,9 +806,10 @@ class FunctionCompiler {
 
     MOZ_ASSERT(v1->type() == MIRType::Simd128);
     MOZ_ASSERT(v2->type() == MIRType::Simd128);
-    auto* ins = MWasmShuffleSimd128::New(
-        alloc(), v1, v2,
-        SimdConstant::CreateX16(reinterpret_cast<int8_t*>(control.bytes)));
+    SimdShuffle s = AnalyzeSimdShuffle(
+        SimdConstant::CreateX16(reinterpret_cast<int8_t*>(control.bytes)), v1,
+        v2);
+    auto* ins = MWasmShuffleSimd128::New(alloc(), v1, v2, s);
     curBlock_->add(ins);
     return ins;
   }
-- 
GitLab