Bug 1476866 - Implement TokenStreamCharsBase::addLineOfContext that does all... (563f25bd) · Commits · The Tor Project / Applications / Mullvad Browser

js/src/frontend/TokenStream.cpp

+32 −21

Original line number	Diff line number	Diff line
		@@ -443,15 +443,12 @@ TokenStreamCharsBase<CharT>::TokenStreamCharsBase(JSContext* cx, const CharT* ch

		template<>
		MOZ_MUST_USE bool
		TokenStreamCharsBase<char16_t>::fillCharBufferWithTemplateStringContents(const char16_t* cur,
		TokenStreamCharsBase<char16_t>::fillCharBufferFromSourceNormalizingAsciiLineBreaks(const char16_t* cur,
		const char16_t* end)
		{
		MOZ_ASSERT(this->charBuffer.length() == 0);

		while (cur < end) {
		// Template literals normalize only '\r' and "\r\n" to '\n'. The
		// Unicode separators need no special handling here.
		// https://tc39.github.io/ecma262/#sec-static-semantics-tv-and-trv
		char16_t ch = *cur++;
		if (ch == '\r') {
		ch = '\n';
		@@ -881,37 +878,51 @@ TokenStreamSpecific<CharT, AnyCharsAccess>::currentLineAndColumn(uint32_t* line,

		template<>
		bool
		TokenStreamCharsBase<Utf8Unit>::addLineOfContext(JSContext* cx, ErrorMetadata* err,
		uint32_t offset)
		TokenStreamCharsBase<Utf8Unit>::addLineOfContext(ErrorMetadata* err, uint32_t offset)
		{
		// The specialization below is almost usable if changed to be a definition
		// for any CharT, but it demands certain UTF-8-specific functionality that
		// has't been defined yet. Use a placeholder definition until such
		// functionality is in place.
		// The specialization below is 100% usable if tweaked to be a definition
		// for any CharT, but it demands SourceUnits::findWindow{Start,End} and
		// TokenStreamCharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks
		// for UTF-8 that haven't been defined yet. Use a placeholder definition
		// til those are place.
		return true;
		}

		template<>
		bool
		TokenStreamCharsBase<char16_t>::addLineOfContext(JSContext* cx, ErrorMetadata* err,
		uint32_t offset)
		TokenStreamCharsBase<char16_t>::addLineOfContext(ErrorMetadata* err, uint32_t offset)
		{
		using CharT = char16_t;
		size_t windowStart = sourceUnits.findWindowStart(offset);
		size_t windowEnd = sourceUnits.findWindowEnd(offset);

		size_t windowLength = windowEnd - windowStart;
		MOZ_ASSERT(windowLength <= SourceUnits::WindowRadius * 2);

		// Create the windowed string, not including the potential line
		// terminator.
		StringBuffer windowBuf(cx);
		if (!windowBuf.append(sourceUnits.codeUnitPtrAt(windowStart), windowLength) \|\|
		!windowBuf.append('\0'))
		{
		return false;
		// Don't add a useless "line" of context when the window ends up empty
		// because of an invalid encoding at the start of a line.
		if (windowLength == 0) {
		MOZ_ASSERT(err->lineOfContext == nullptr,
		"ErrorMetadata::lineOfContext must be null so we don't "
		"have to set the lineLength/tokenOffset fields");
		return true;
		}

		err->lineOfContext.reset(windowBuf.stealChars());
		// We might have hit an error while processing some source code feature
		// that's accumulating text into \|this->charBuffer\| -- e.g. we could be
		// halfway into a regular expression literal, then encounter invalid UTF-8.
		// Thus we must clear \|this->charBuffer\| of prior work.
		this->charBuffer.clear();

		const CharT* start = sourceUnits.codeUnitPtrAt(windowStart);
		if (!fillCharBufferFromSourceNormalizingAsciiLineBreaks(start, start + windowLength))
		return false;

		// The windowed string is null-terminated.
		if (!this->charBuffer.append('\0'))
		return false;

		err->lineOfContext.reset(this->charBuffer.extractOrCopyRawBuffer());
		if (!err->lineOfContext)
		return false;

js/src/frontend/TokenStream.h

+15 −14

Original line number	Diff line number	Diff line
		@@ -1314,12 +1314,12 @@ class TokenStreamCharsBase
		template<typename T> inline void consumeKnownCodeUnit(T) = delete;

		/**
		* Accumulate the provided range of already-validated (i.e. valid UTF-8, or
		* anything if CharT is char16_t because JS permits lone and mispaired
		* surrogates) raw template literal text (i.e. containing no escapes or
		* substitutions) into \|charBuffer\|.
		* Accumulate the provided range of already-validated text (valid UTF-8, or
		* anything if CharT is char16_t because JS allows lone surrogates) into
		* \|charBuffer\|. Normalize '\r', '\n', and "\r\n" into '\n'.
		*/
		MOZ_MUST_USE bool fillCharBufferWithTemplateStringContents(const CharT* cur, const CharT* end);
		MOZ_MUST_USE bool
		fillCharBufferFromSourceNormalizingAsciiLineBreaks(const CharT* cur, const CharT* end);

		/**
		* Add a null-terminated line of context to error information, for the line
		@@ -1334,7 +1334,7 @@ class TokenStreamCharsBase
		* This function is quite internal, and you probably should be calling one
		* of its existing callers instead.
		*/
		MOZ_MUST_USE bool addLineOfContext(JSContext* cx, ErrorMetadata* err, uint32_t offset);
		MOZ_MUST_USE bool addLineOfContext(ErrorMetadata* err, uint32_t offset);

		protected:
		/** Code units in the source code being tokenized. */
		@@ -1474,7 +1474,6 @@ class GeneralTokenStreamChars
		using SpecializedCharsBase = SpecializedTokenStreamCharsBase<CharT>;

		private:
		using CharsBase::addLineOfContext;
		// Deliberately don't \|using CharsBase::sourceUnits\| because of bug 1472569. :-(

		private:
		@@ -1504,8 +1503,9 @@ class GeneralTokenStreamChars
		uint32_t matchExtendedUnicodeEscape(uint32_t* codePoint);

		protected:
		using CharsBase::addLineOfContext;
		using TokenStreamCharsShared::drainCharBufferIntoAtom;
		using CharsBase::fillCharBufferWithTemplateStringContents;
		using CharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks;

		using typename CharsBase::SourceUnits;

		@@ -1608,16 +1608,14 @@ class GeneralTokenStreamChars
		* more readable.
		*/
		MOZ_MUST_USE bool internalComputeLineOfContext(ErrorMetadata* err, uint32_t offset) {
		TokenStreamAnyChars& anyChars = anyCharsAccess();

		// We only have line-start information for the current line. If the error
		// is on a different line, we can't easily provide context. (This means
		// any error in a multi-line token, e.g. an unterminated multiline string
		// literal, won't have context.)
		if (err->lineNumber != anyChars.lineno)
		if (err->lineNumber != anyCharsAccess().lineno)
		return true;

		return addLineOfContext(anyChars.cx, err, offset);
		return addLineOfContext(err, offset);
		}

		public:
		@@ -1636,7 +1634,10 @@ class GeneralTokenStreamChars
		end = this->sourceUnits.codeUnitPtrAt(anyChars.currentToken().pos.end - 1);
		}

		if (!fillCharBufferWithTemplateStringContents(cur, end))
		// Template literals normalize only '\r' and "\r\n" to '\n'; Unicode
		// separators don't need special handling.
		// https://tc39.github.io/ecma262/#sec-static-semantics-tv-and-trv
		if (!fillCharBufferFromSourceNormalizingAsciiLineBreaks(cur, end))
		return nullptr;

		return drainCharBufferIntoAtom(anyChars.cx);
		@@ -1881,7 +1882,7 @@ class MOZ_STACK_CLASS TokenStreamSpecific
		using SpecializedChars::consumeRestOfSingleLineComment;
		using TokenStreamCharsShared::copyCharBufferTo;
		using TokenStreamCharsShared::drainCharBufferIntoAtom;
		using CharsBase::fillCharBufferWithTemplateStringContents;
		using CharsBase::fillCharBufferFromSourceNormalizingAsciiLineBreaks;
		using SpecializedChars::getCodePoint;
		using GeneralCharsBase::getCodeUnit;
		using SpecializedChars::getFullAsciiCodePoint;