Commit 563f25bd authored by Jeff Walden's avatar Jeff Walden
Browse files

Bug 1476866 - Implement TokenStreamCharsBase::addLineOfContext that does all...

Bug 1476866 - Implement TokenStreamCharsBase::addLineOfContext that does all the work of creating a syntax-error window and works for either CharT.  r=arai
parent a9e2ee73
Loading
Loading
Loading
Loading
+32 −21
Original line number Diff line number Diff line
@@ -443,15 +443,12 @@ TokenStreamCharsBase<CharT>::TokenStreamCharsBase(JSContext* cx, const CharT* ch

template<>
MOZ_MUST_USE bool
TokenStreamCharsBase<char16_t>::fillCharBufferWithTemplateStringContents(const char16_t* cur,
TokenStreamCharsBase<char16_t>::fillCharBufferFromSourceNormalizingAsciiLineBreaks(const char16_t* cur,
                                                                                   const char16_t* end)
{
    MOZ_ASSERT(this->charBuffer.length() == 0);

    while (cur < end) {
        // Template literals normalize only '\r' and "\r\n" to '\n'.  The
        // Unicode separators need no special handling here.
        // https://tc39.github.io/ecma262/#sec-static-semantics-tv-and-trv
        char16_t ch = *cur++;
        if (ch == '\r') {
            ch = '\n';
@@ -881,37 +878,51 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::currentLineAndColumn(uint32_t* line,

template<>
bool
TokenStreamCharsBase<Utf8Unit>::addLineOfContext(JSContext* cx, ErrorMetadata* err,
                                                 uint32_t offset)
TokenStreamCharsBase<Utf8Unit>::addLineOfContext(ErrorMetadata* err, uint32_t offset)
{
    // The specialization below is almost usable if changed to be a definition
    // for any CharT, but it demands certain UTF-8-specific functionality that
    // has't been defined yet.  Use a placeholder definition until such
    // functionality is in place.
    // The specialization below is 100% usable if tweaked to be a definition
    // for any CharT, but it demands SourceUnits::findWindow{Start,End} and
    // TokenStreamCharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks
    // for UTF-8 that haven't been defined yet.  Use a placeholder definition
    // til those are place.
    return true;
}

template<>
bool
TokenStreamCharsBase<char16_t>::addLineOfContext(JSContext* cx, ErrorMetadata* err,
                                                 uint32_t offset)
TokenStreamCharsBase<char16_t>::addLineOfContext(ErrorMetadata* err, uint32_t offset)
{
    using CharT = char16_t;
    size_t windowStart = sourceUnits.findWindowStart(offset);
    size_t windowEnd = sourceUnits.findWindowEnd(offset);

    size_t windowLength = windowEnd - windowStart;
    MOZ_ASSERT(windowLength <= SourceUnits::WindowRadius * 2);

    // Create the windowed string, not including the potential line
    // terminator.
    StringBuffer windowBuf(cx);
    if (!windowBuf.append(sourceUnits.codeUnitPtrAt(windowStart), windowLength) ||
        !windowBuf.append('\0'))
    {
        return false;
    // Don't add a useless "line" of context when the window ends up empty
    // because of an invalid encoding at the start of a line.
    if (windowLength == 0) {
        MOZ_ASSERT(err->lineOfContext == nullptr,
                   "ErrorMetadata::lineOfContext must be null so we don't "
                   "have to set the lineLength/tokenOffset fields");
        return true;
    }

    err->lineOfContext.reset(windowBuf.stealChars());
    // We might have hit an error while processing some source code feature
    // that's accumulating text into |this->charBuffer| -- e.g. we could be
    // halfway into a regular expression literal, then encounter invalid UTF-8.
    // Thus we must clear |this->charBuffer| of prior work.
    this->charBuffer.clear();

    const CharT* start = sourceUnits.codeUnitPtrAt(windowStart);
    if (!fillCharBufferFromSourceNormalizingAsciiLineBreaks(start, start + windowLength))
        return false;

    // The windowed string is null-terminated.
    if (!this->charBuffer.append('\0'))
        return false;

    err->lineOfContext.reset(this->charBuffer.extractOrCopyRawBuffer());
    if (!err->lineOfContext)
        return false;

+15 −14
Original line number Diff line number Diff line
@@ -1314,12 +1314,12 @@ class TokenStreamCharsBase
    template<typename T> inline void consumeKnownCodeUnit(T) = delete;

    /**
     * Accumulate the provided range of already-validated (i.e. valid UTF-8, or
     * anything if CharT is char16_t because JS permits lone and mispaired
     * surrogates) raw template literal text (i.e. containing no escapes or
     * substitutions) into |charBuffer|.
     * Accumulate the provided range of already-validated text (valid UTF-8, or
     * anything if CharT is char16_t because JS allows lone surrogates) into
     * |charBuffer|.  Normalize '\r', '\n', and "\r\n" into '\n'.
     */
    MOZ_MUST_USE bool fillCharBufferWithTemplateStringContents(const CharT* cur, const CharT* end);
    MOZ_MUST_USE bool
    fillCharBufferFromSourceNormalizingAsciiLineBreaks(const CharT* cur, const CharT* end);

    /**
     * Add a null-terminated line of context to error information, for the line
@@ -1334,7 +1334,7 @@ class TokenStreamCharsBase
     * This function is quite internal, and you probably should be calling one
     * of its existing callers instead.
     */
    MOZ_MUST_USE bool addLineOfContext(JSContext* cx, ErrorMetadata* err, uint32_t offset);
    MOZ_MUST_USE bool addLineOfContext(ErrorMetadata* err, uint32_t offset);

  protected:
    /** Code units in the source code being tokenized. */
@@ -1474,7 +1474,6 @@ class GeneralTokenStreamChars
    using SpecializedCharsBase = SpecializedTokenStreamCharsBase<CharT>;

  private:
    using CharsBase::addLineOfContext;
    // Deliberately don't |using CharsBase::sourceUnits| because of bug 1472569.  :-(

  private:
@@ -1504,8 +1503,9 @@ class GeneralTokenStreamChars
    uint32_t matchExtendedUnicodeEscape(uint32_t* codePoint);

  protected:
    using CharsBase::addLineOfContext;
    using TokenStreamCharsShared::drainCharBufferIntoAtom;
    using CharsBase::fillCharBufferWithTemplateStringContents;
    using CharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks;

    using typename CharsBase::SourceUnits;

@@ -1608,16 +1608,14 @@ class GeneralTokenStreamChars
     * more readable.
     */
    MOZ_MUST_USE bool internalComputeLineOfContext(ErrorMetadata* err, uint32_t offset) {
        TokenStreamAnyChars& anyChars = anyCharsAccess();

        // We only have line-start information for the current line.  If the error
        // is on a different line, we can't easily provide context.  (This means
        // any error in a multi-line token, e.g. an unterminated multiline string
        // literal, won't have context.)
        if (err->lineNumber != anyChars.lineno)
        if (err->lineNumber != anyCharsAccess().lineno)
            return true;

        return addLineOfContext(anyChars.cx, err, offset);
        return addLineOfContext(err, offset);
    }

  public:
@@ -1636,7 +1634,10 @@ class GeneralTokenStreamChars
            end = this->sourceUnits.codeUnitPtrAt(anyChars.currentToken().pos.end - 1);
        }

        if (!fillCharBufferWithTemplateStringContents(cur, end))
        // Template literals normalize only '\r' and "\r\n" to '\n'; Unicode
        // separators don't need special handling.
        // https://tc39.github.io/ecma262/#sec-static-semantics-tv-and-trv
        if (!fillCharBufferFromSourceNormalizingAsciiLineBreaks(cur, end))
            return nullptr;

        return drainCharBufferIntoAtom(anyChars.cx);
@@ -1881,7 +1882,7 @@ class MOZ_STACK_CLASS TokenStreamSpecific
    using SpecializedChars::consumeRestOfSingleLineComment;
    using TokenStreamCharsShared::copyCharBufferTo;
    using TokenStreamCharsShared::drainCharBufferIntoAtom;
    using CharsBase::fillCharBufferWithTemplateStringContents;
    using CharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks;
    using SpecializedChars::getCodePoint;
    using GeneralCharsBase::getCodeUnit;
    using SpecializedChars::getFullAsciiCodePoint;