Commit 843c6e8c authored by Julian Seward's avatar Julian Seward Committed by jseward@mozilla.com
Browse files

Bug 1967644 - wasm: Reduce inlining aggressiveness for very large functions. r=rhunt.

When inlining into a very large function, reduce inlining agressiveness by one
step (on the 1 .. 9 scale) if the function's bytecode size is over 400KB; by 2
steps if over 800KB, and by 3 steps if over 1.2MB.  This somewhat mitigates a
problem where inlining takes an already huge function and makes it even
bigger, leading to long delays in Ion.

The thresholds are so high (more than 400KB bytecode) that almost all functions
will be unaffected.  Above the 400k threshold, we gradually back off on
inlining; it doesn't get disabled completely.

The patch falls logically into two parts: the heuristic itself ..

* InliningHeuristics::isSmallEnoughToInline: the heuristic itself

* struct FuncCompileInput: new convenience function `bytecodeSize`

.. and updates to the stats-counting machinery.

* struct InliningStats: new field `largeFunctionBackoff`

* struct CompileStats: new field `numLargeFunctionBackoffs`

* auditInlineableCallees (calls isSmallEnoughToInline): note in RootCompiler if
  backoff was applied

Differential Revision: https://phabricator.services.mozilla.com/D250402
parent d38f51bc
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -391,6 +391,7 @@ void CompileStats::merge(const CompileStats& other) {
  inlinedDirectCallBytecodeSize += other.inlinedDirectCallBytecodeSize;
  inlinedCallRefBytecodeSize += other.inlinedCallRefBytecodeSize;
  numInliningBudgetOverruns += other.numInliningBudgetOverruns;
  numLargeFunctionBackoffs += other.numLargeFunctionBackoffs;
}

void CompileAndLinkStats::merge(const CompileAndLinkStats& other) {
@@ -418,6 +419,8 @@ void CompileAndLinkStats::print() const {
         inlinedCallRefBytecodeSize);
  JS_LOG(wasmPerf, Info, "    %7zu functions overran inlining budget",
         numInliningBudgetOverruns);
  JS_LOG(wasmPerf, Info, "    %7zu functions needed large-function backoff",
         numLargeFunctionBackoffs);
  JS_LOG(wasmPerf, Info, "    %7zu bytes mmap'd for code storage",
         codeBytesMapped);
  JS_LOG(wasmPerf, Info, "    %7zu bytes actually used for code storage",
+6 −1
Original line number Diff line number Diff line
@@ -1731,6 +1731,9 @@ struct CompileStats {
  size_t inlinedCallRefBytecodeSize;
  // number of funcs for which inlining stopped due to budget overrun
  size_t numInliningBudgetOverruns;
  // number of funcs for which inlining was made less aggressive because the
  // function was already large
  size_t numLargeFunctionBackoffs = 0;

  void clear() {
    numFuncs = 0;
@@ -1740,13 +1743,15 @@ struct CompileStats {
    inlinedDirectCallBytecodeSize = 0;
    inlinedCallRefBytecodeSize = 0;
    numInliningBudgetOverruns = 0;
    numLargeFunctionBackoffs = 0;
  }
  CompileStats() { clear(); }

  bool empty() const {
    return 0 == (numFuncs | bytecodeSize | inlinedDirectCallCount |
                 inlinedCallRefCount | inlinedDirectCallBytecodeSize |
                 inlinedCallRefBytecodeSize | numInliningBudgetOverruns);
                 inlinedCallRefBytecodeSize | numInliningBudgetOverruns |
                 numLargeFunctionBackoffs);
  }

  // Merge in the counts from `other`.  When using this, be careful to avoid
+6 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include "threading/ProtectedData.h"
#include "vm/HelperThreadTask.h"
#include "wasm/WasmCompile.h"
#include "wasm/WasmConstants.h"
#include "wasm/WasmMetadata.h"
#include "wasm/WasmModule.h"

@@ -58,6 +59,11 @@ struct FuncCompileInput {
        index(index),
        lineOrBytecode(lineOrBytecode),
        callSiteLineNums(std::move(callSiteLineNums)) {}

  uint32_t bytecodeSize() const {
    static_assert(wasm::MaxFunctionBytes <= UINT32_MAX);
    return uint32_t(end - begin);
  }
};

using FuncCompileInputVector = Vector<FuncCompileInput, 8, SystemAllocPolicy>;
+35 −1
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include "js/Prefs.h"
#include "threading/ExclusiveData.h"
#include "vm/MutexIDs.h"
#include "wasm/WasmConstants.h"

namespace js {
namespace wasm {
@@ -213,6 +214,10 @@ class InliningHeuristics {
  static constexpr uint32_t MIN_LEVEL = 1;
  static constexpr uint32_t MAX_LEVEL = 9;

  static constexpr uint32_t LARGE_FUNCTION_THRESH_1 = 400000;
  static constexpr uint32_t LARGE_FUNCTION_THRESH_2 = 800000;
  static constexpr uint32_t LARGE_FUNCTION_THRESH_3 = 1200000;

 public:
  // 1 = no inlining allowed
  // 2 = min (minimal inlining)
@@ -253,11 +258,22 @@ class InliningHeuristics {
  // other words, a value of zero means the query relates to a function which
  // (if approved) would be inlined into the top-level function currently being
  // compiled.
  //
  // `rootFunctionBodyLength` is the bytecode size of the function at the root
  // of this inlining stack.  If that is (very) large, we back off somewhat on
  // inlining.  `*largeFunctionBackoff` indicates whether or not that happened.
  enum class CallKind { Direct, CallRef };
  static bool isSmallEnoughToInline(CallKind callKind, uint32_t inliningDepth,
                                    uint32_t bodyLength) {
                                    uint32_t bodyLength,
                                    uint32_t rootFunctionBodyLength,
                                    bool* largeFunctionBackoff) {
    *largeFunctionBackoff = false;

    // If this fails, something's seriously wrong; bail out.
    MOZ_RELEASE_ASSERT(inliningDepth <= 10);  // because 10 > (320 / 40)
    MOZ_ASSERT(rootFunctionBodyLength > 0 &&
               rootFunctionBodyLength <= wasm::MaxFunctionBytes);

    // Check whether calls of this kind are currently allowed
    if ((callKind == CallKind::Direct && !rawDirectAllowed()) ||
        (callKind == CallKind::CallRef && !rawCallRefAllowed())) {
@@ -277,6 +293,24 @@ class InliningHeuristics {
                                            160,  // default
                                            200, 240, 280, 320};
    uint32_t level = rawLevel();

    // If the root function is large, back off somewhat on inlining, so as to
    // limit its further growth.  The limits are set so high that almost all
    // functions will be unaffected by this.  See bug 1967644.
    if (rootFunctionBodyLength > LARGE_FUNCTION_THRESH_1 && level > MIN_LEVEL) {
      level--;
      *largeFunctionBackoff = true;
    }
    if (rootFunctionBodyLength > LARGE_FUNCTION_THRESH_2 && level > MIN_LEVEL) {
      level--;
      *largeFunctionBackoff = true;
    }
    if (rootFunctionBodyLength > LARGE_FUNCTION_THRESH_3 && level > MIN_LEVEL) {
      level--;
      *largeFunctionBackoff = true;
    }

    // Having established `level`, check whether the callee is small enough.
    MOZ_RELEASE_ASSERT(level >= MIN_LEVEL && level <= MAX_LEVEL);
    int32_t allowedSize = baseSize[level - MIN_LEVEL];
    allowedSize -= int32_t(40 * inliningDepth);
+17 −3
Original line number Diff line number Diff line
@@ -275,6 +275,7 @@ struct InliningStats {
  size_t inlinedDirectFunctions = 0;      // number of inlinees
  size_t inlinedCallRefBytecodeSize = 0;  // sum of sizes of inlinees
  size_t inlinedCallRefFunctions = 0;     // number of inlinees
  bool largeFunctionBackoff = false;      // did large function backoff happen?
};

// Encapsulates the generation of MIR for a wasm function and any functions
@@ -354,12 +355,16 @@ class RootCompiler {
  const CompilerEnvironment& compilerEnv() const { return compilerEnv_; }
  const CodeMetadata& codeMeta() const { return codeMeta_; }
  const CodeTailMetadata* codeTailMeta() const { return codeTailMeta_; }
  const FuncCompileInput& func() const { return func_; }
  TempAllocator& alloc() { return alloc_; }
  MIRGraph& mirGraph() { return mirGraph_; }
  MIRGenerator& mirGen() { return mirGen_; }
  int64_t inliningBudget() const { return localInliningBudget_; }
  FeatureUsage observedFeatures() const { return observedFeatures_; }
  const CompileStats& funcStats() const { return funcStats_; }
  void noteLargeFunctionBackoffWasApplied() {
    inliningStats_.largeFunctionBackoff = true;
  }

  uint32_t loopDepth() const { return loopDepth_; }
  void startLoop() { loopDepth_++; }
@@ -2639,8 +2644,15 @@ class FunctionCompiler {
      // Ask the heuristics system if we're allowed to inline a function of
      // this size and kind at the current inlining depth.
      uint32_t inlineeBodySize = codeTailMeta()->funcDefRange(funcIndex).size;
      if (!InliningHeuristics::isSmallEnoughToInline(kind, inliningDepth(),
                                                     inlineeBodySize)) {
      uint32_t rootFunctionBodySize = rootCompiler_.func().bytecodeSize();
      bool largeFunctionBackoff;
      bool smallEnough = InliningHeuristics::isSmallEnoughToInline(
          kind, inliningDepth(), inlineeBodySize, rootFunctionBodySize,
          &largeFunctionBackoff);
      if (largeFunctionBackoff) {
        rootCompiler_.noteLargeFunctionBackoffWasApplied();
      }
      if (!smallEnough) {
        continue;
      }

@@ -10555,13 +10567,15 @@ bool RootCompiler::generate() {
  MOZ_ASSERT(loopDepth_ == 0);

  funcStats_.numFuncs += 1;
  funcStats_.bytecodeSize += func_.end - func_.begin;
  funcStats_.bytecodeSize += func_.bytecodeSize();
  funcStats_.inlinedDirectCallCount += inliningStats_.inlinedDirectFunctions;
  funcStats_.inlinedCallRefCount += inliningStats_.inlinedCallRefFunctions;
  funcStats_.inlinedDirectCallBytecodeSize +=
      inliningStats_.inlinedDirectBytecodeSize;
  funcStats_.inlinedCallRefBytecodeSize +=
      inliningStats_.inlinedCallRefBytecodeSize;
  funcStats_.numLargeFunctionBackoffs +=
      inliningStats_.largeFunctionBackoff ? 1 : 0;

  if (codeTailMeta_) {
    auto guard = codeTailMeta_->inliningBudget.lock();
Loading