Loading js/src/frontend/TokenStream.cpp +32 −21 Original line number Diff line number Diff line Loading @@ -443,15 +443,12 @@ TokenStreamCharsBase<CharT>::TokenStreamCharsBase(JSContext* cx, const CharT* ch template<> MOZ_MUST_USE bool TokenStreamCharsBase<char16_t>::fillCharBufferWithTemplateStringContents(const char16_t* cur, TokenStreamCharsBase<char16_t>::fillCharBufferFromSourceNormalizingAsciiLineBreaks(const char16_t* cur, const char16_t* end) { MOZ_ASSERT(this->charBuffer.length() == 0); while (cur < end) { // Template literals normalize only '\r' and "\r\n" to '\n'. The // Unicode separators need no special handling here. // https://tc39.github.io/ecma262/#sec-static-semantics-tv-and-trv char16_t ch = *cur++; if (ch == '\r') { ch = '\n'; Loading Loading @@ -881,37 +878,51 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::currentLineAndColumn(uint32_t* line, template<> bool TokenStreamCharsBase<Utf8Unit>::addLineOfContext(JSContext* cx, ErrorMetadata* err, uint32_t offset) TokenStreamCharsBase<Utf8Unit>::addLineOfContext(ErrorMetadata* err, uint32_t offset) { // The specialization below is almost usable if changed to be a definition // for any CharT, but it demands certain UTF-8-specific functionality that // has't been defined yet. Use a placeholder definition until such // functionality is in place. // The specialization below is 100% usable if tweaked to be a definition // for any CharT, but it demands SourceUnits::findWindow{Start,End} and // TokenStreamCharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks // for UTF-8 that haven't been defined yet. Use a placeholder definition // til those are place. return true; } template<> bool TokenStreamCharsBase<char16_t>::addLineOfContext(JSContext* cx, ErrorMetadata* err, uint32_t offset) TokenStreamCharsBase<char16_t>::addLineOfContext(ErrorMetadata* err, uint32_t offset) { using CharT = char16_t; size_t windowStart = sourceUnits.findWindowStart(offset); size_t windowEnd = sourceUnits.findWindowEnd(offset); size_t windowLength = windowEnd - windowStart; MOZ_ASSERT(windowLength <= SourceUnits::WindowRadius * 2); // Create the windowed string, not including the potential line // terminator. StringBuffer windowBuf(cx); if (!windowBuf.append(sourceUnits.codeUnitPtrAt(windowStart), windowLength) || !windowBuf.append('\0')) { return false; // Don't add a useless "line" of context when the window ends up empty // because of an invalid encoding at the start of a line. if (windowLength == 0) { MOZ_ASSERT(err->lineOfContext == nullptr, "ErrorMetadata::lineOfContext must be null so we don't " "have to set the lineLength/tokenOffset fields"); return true; } err->lineOfContext.reset(windowBuf.stealChars()); // We might have hit an error while processing some source code feature // that's accumulating text into |this->charBuffer| -- e.g. we could be // halfway into a regular expression literal, then encounter invalid UTF-8. // Thus we must clear |this->charBuffer| of prior work. this->charBuffer.clear(); const CharT* start = sourceUnits.codeUnitPtrAt(windowStart); if (!fillCharBufferFromSourceNormalizingAsciiLineBreaks(start, start + windowLength)) return false; // The windowed string is null-terminated. if (!this->charBuffer.append('\0')) return false; err->lineOfContext.reset(this->charBuffer.extractOrCopyRawBuffer()); if (!err->lineOfContext) return false; Loading js/src/frontend/TokenStream.h +15 −14 Original line number Diff line number Diff line Loading @@ -1314,12 +1314,12 @@ class TokenStreamCharsBase template<typename T> inline void consumeKnownCodeUnit(T) = delete; /** * Accumulate the provided range of already-validated (i.e. valid UTF-8, or * anything if CharT is char16_t because JS permits lone and mispaired * surrogates) raw template literal text (i.e. containing no escapes or * substitutions) into |charBuffer|. * Accumulate the provided range of already-validated text (valid UTF-8, or * anything if CharT is char16_t because JS allows lone surrogates) into * |charBuffer|. Normalize '\r', '\n', and "\r\n" into '\n'. */ MOZ_MUST_USE bool fillCharBufferWithTemplateStringContents(const CharT* cur, const CharT* end); MOZ_MUST_USE bool fillCharBufferFromSourceNormalizingAsciiLineBreaks(const CharT* cur, const CharT* end); /** * Add a null-terminated line of context to error information, for the line Loading @@ -1334,7 +1334,7 @@ class TokenStreamCharsBase * This function is quite internal, and you probably should be calling one * of its existing callers instead. */ MOZ_MUST_USE bool addLineOfContext(JSContext* cx, ErrorMetadata* err, uint32_t offset); MOZ_MUST_USE bool addLineOfContext(ErrorMetadata* err, uint32_t offset); protected: /** Code units in the source code being tokenized. */ Loading Loading @@ -1474,7 +1474,6 @@ class GeneralTokenStreamChars using SpecializedCharsBase = SpecializedTokenStreamCharsBase<CharT>; private: using CharsBase::addLineOfContext; // Deliberately don't |using CharsBase::sourceUnits| because of bug 1472569. :-( private: Loading Loading @@ -1504,8 +1503,9 @@ class GeneralTokenStreamChars uint32_t matchExtendedUnicodeEscape(uint32_t* codePoint); protected: using CharsBase::addLineOfContext; using TokenStreamCharsShared::drainCharBufferIntoAtom; using CharsBase::fillCharBufferWithTemplateStringContents; using CharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks; using typename CharsBase::SourceUnits; Loading Loading @@ -1608,16 +1608,14 @@ class GeneralTokenStreamChars * more readable. */ MOZ_MUST_USE bool internalComputeLineOfContext(ErrorMetadata* err, uint32_t offset) { TokenStreamAnyChars& anyChars = anyCharsAccess(); // We only have line-start information for the current line. If the error // is on a different line, we can't easily provide context. (This means // any error in a multi-line token, e.g. an unterminated multiline string // literal, won't have context.) if (err->lineNumber != anyChars.lineno) if (err->lineNumber != anyCharsAccess().lineno) return true; return addLineOfContext(anyChars.cx, err, offset); return addLineOfContext(err, offset); } public: Loading @@ -1636,7 +1634,10 @@ class GeneralTokenStreamChars end = this->sourceUnits.codeUnitPtrAt(anyChars.currentToken().pos.end - 1); } if (!fillCharBufferWithTemplateStringContents(cur, end)) // Template literals normalize only '\r' and "\r\n" to '\n'; Unicode // separators don't need special handling. // https://tc39.github.io/ecma262/#sec-static-semantics-tv-and-trv if (!fillCharBufferFromSourceNormalizingAsciiLineBreaks(cur, end)) return nullptr; return drainCharBufferIntoAtom(anyChars.cx); Loading Loading @@ -1881,7 +1882,7 @@ class MOZ_STACK_CLASS TokenStreamSpecific using SpecializedChars::consumeRestOfSingleLineComment; using TokenStreamCharsShared::copyCharBufferTo; using TokenStreamCharsShared::drainCharBufferIntoAtom; using CharsBase::fillCharBufferWithTemplateStringContents; using CharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks; using SpecializedChars::getCodePoint; using GeneralCharsBase::getCodeUnit; using SpecializedChars::getFullAsciiCodePoint; Loading Loading
js/src/frontend/TokenStream.cpp +32 −21 Original line number Diff line number Diff line Loading @@ -443,15 +443,12 @@ TokenStreamCharsBase<CharT>::TokenStreamCharsBase(JSContext* cx, const CharT* ch template<> MOZ_MUST_USE bool TokenStreamCharsBase<char16_t>::fillCharBufferWithTemplateStringContents(const char16_t* cur, TokenStreamCharsBase<char16_t>::fillCharBufferFromSourceNormalizingAsciiLineBreaks(const char16_t* cur, const char16_t* end) { MOZ_ASSERT(this->charBuffer.length() == 0); while (cur < end) { // Template literals normalize only '\r' and "\r\n" to '\n'. The // Unicode separators need no special handling here. // https://tc39.github.io/ecma262/#sec-static-semantics-tv-and-trv char16_t ch = *cur++; if (ch == '\r') { ch = '\n'; Loading Loading @@ -881,37 +878,51 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::currentLineAndColumn(uint32_t* line, template<> bool TokenStreamCharsBase<Utf8Unit>::addLineOfContext(JSContext* cx, ErrorMetadata* err, uint32_t offset) TokenStreamCharsBase<Utf8Unit>::addLineOfContext(ErrorMetadata* err, uint32_t offset) { // The specialization below is almost usable if changed to be a definition // for any CharT, but it demands certain UTF-8-specific functionality that // has't been defined yet. Use a placeholder definition until such // functionality is in place. // The specialization below is 100% usable if tweaked to be a definition // for any CharT, but it demands SourceUnits::findWindow{Start,End} and // TokenStreamCharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks // for UTF-8 that haven't been defined yet. Use a placeholder definition // til those are place. return true; } template<> bool TokenStreamCharsBase<char16_t>::addLineOfContext(JSContext* cx, ErrorMetadata* err, uint32_t offset) TokenStreamCharsBase<char16_t>::addLineOfContext(ErrorMetadata* err, uint32_t offset) { using CharT = char16_t; size_t windowStart = sourceUnits.findWindowStart(offset); size_t windowEnd = sourceUnits.findWindowEnd(offset); size_t windowLength = windowEnd - windowStart; MOZ_ASSERT(windowLength <= SourceUnits::WindowRadius * 2); // Create the windowed string, not including the potential line // terminator. StringBuffer windowBuf(cx); if (!windowBuf.append(sourceUnits.codeUnitPtrAt(windowStart), windowLength) || !windowBuf.append('\0')) { return false; // Don't add a useless "line" of context when the window ends up empty // because of an invalid encoding at the start of a line. if (windowLength == 0) { MOZ_ASSERT(err->lineOfContext == nullptr, "ErrorMetadata::lineOfContext must be null so we don't " "have to set the lineLength/tokenOffset fields"); return true; } err->lineOfContext.reset(windowBuf.stealChars()); // We might have hit an error while processing some source code feature // that's accumulating text into |this->charBuffer| -- e.g. we could be // halfway into a regular expression literal, then encounter invalid UTF-8. // Thus we must clear |this->charBuffer| of prior work. this->charBuffer.clear(); const CharT* start = sourceUnits.codeUnitPtrAt(windowStart); if (!fillCharBufferFromSourceNormalizingAsciiLineBreaks(start, start + windowLength)) return false; // The windowed string is null-terminated. if (!this->charBuffer.append('\0')) return false; err->lineOfContext.reset(this->charBuffer.extractOrCopyRawBuffer()); if (!err->lineOfContext) return false; Loading
js/src/frontend/TokenStream.h +15 −14 Original line number Diff line number Diff line Loading @@ -1314,12 +1314,12 @@ class TokenStreamCharsBase template<typename T> inline void consumeKnownCodeUnit(T) = delete; /** * Accumulate the provided range of already-validated (i.e. valid UTF-8, or * anything if CharT is char16_t because JS permits lone and mispaired * surrogates) raw template literal text (i.e. containing no escapes or * substitutions) into |charBuffer|. * Accumulate the provided range of already-validated text (valid UTF-8, or * anything if CharT is char16_t because JS allows lone surrogates) into * |charBuffer|. Normalize '\r', '\n', and "\r\n" into '\n'. */ MOZ_MUST_USE bool fillCharBufferWithTemplateStringContents(const CharT* cur, const CharT* end); MOZ_MUST_USE bool fillCharBufferFromSourceNormalizingAsciiLineBreaks(const CharT* cur, const CharT* end); /** * Add a null-terminated line of context to error information, for the line Loading @@ -1334,7 +1334,7 @@ class TokenStreamCharsBase * This function is quite internal, and you probably should be calling one * of its existing callers instead. */ MOZ_MUST_USE bool addLineOfContext(JSContext* cx, ErrorMetadata* err, uint32_t offset); MOZ_MUST_USE bool addLineOfContext(ErrorMetadata* err, uint32_t offset); protected: /** Code units in the source code being tokenized. */ Loading Loading @@ -1474,7 +1474,6 @@ class GeneralTokenStreamChars using SpecializedCharsBase = SpecializedTokenStreamCharsBase<CharT>; private: using CharsBase::addLineOfContext; // Deliberately don't |using CharsBase::sourceUnits| because of bug 1472569. :-( private: Loading Loading @@ -1504,8 +1503,9 @@ class GeneralTokenStreamChars uint32_t matchExtendedUnicodeEscape(uint32_t* codePoint); protected: using CharsBase::addLineOfContext; using TokenStreamCharsShared::drainCharBufferIntoAtom; using CharsBase::fillCharBufferWithTemplateStringContents; using CharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks; using typename CharsBase::SourceUnits; Loading Loading @@ -1608,16 +1608,14 @@ class GeneralTokenStreamChars * more readable. */ MOZ_MUST_USE bool internalComputeLineOfContext(ErrorMetadata* err, uint32_t offset) { TokenStreamAnyChars& anyChars = anyCharsAccess(); // We only have line-start information for the current line. If the error // is on a different line, we can't easily provide context. (This means // any error in a multi-line token, e.g. an unterminated multiline string // literal, won't have context.) if (err->lineNumber != anyChars.lineno) if (err->lineNumber != anyCharsAccess().lineno) return true; return addLineOfContext(anyChars.cx, err, offset); return addLineOfContext(err, offset); } public: Loading @@ -1636,7 +1634,10 @@ class GeneralTokenStreamChars end = this->sourceUnits.codeUnitPtrAt(anyChars.currentToken().pos.end - 1); } if (!fillCharBufferWithTemplateStringContents(cur, end)) // Template literals normalize only '\r' and "\r\n" to '\n'; Unicode // separators don't need special handling. // https://tc39.github.io/ecma262/#sec-static-semantics-tv-and-trv if (!fillCharBufferFromSourceNormalizingAsciiLineBreaks(cur, end)) return nullptr; return drainCharBufferIntoAtom(anyChars.cx); Loading Loading @@ -1881,7 +1882,7 @@ class MOZ_STACK_CLASS TokenStreamSpecific using SpecializedChars::consumeRestOfSingleLineComment; using TokenStreamCharsShared::copyCharBufferTo; using TokenStreamCharsShared::drainCharBufferIntoAtom; using CharsBase::fillCharBufferWithTemplateStringContents; using CharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks; using SpecializedChars::getCodePoint; using GeneralCharsBase::getCodeUnit; using SpecializedChars::getFullAsciiCodePoint; Loading