Loading docshell/base/URIFixup.jsm +169 −242 Original line number Diff line number Diff line Loading @@ -7,11 +7,14 @@ /** * This component handles fixing up URIs, by correcting obvious typos and adding * missing schemes. * URI references: * http://www.faqs.org/rfcs/rfc1738.html * http://www.faqs.org/rfcs/rfc2396.html */ // TODO (Bug 1623383): This is a direct conversion from .cpp, as such a few // functions are overly complex and should be cleaned up. /* eslint complexity: ["error", 63] */ // getFixupURIInfo has a complex logic, that likely could be simplified, but // the risk of regressions is high, thus that should be done with care. /* eslint complexity: ["error", 39] */ var EXPORTED_SYMBOLS = ["URIFixup", "URIFixupInfo"]; Loading Loading @@ -74,36 +77,25 @@ const { const COMMON_PROTOCOLS = ["http", "https", "ftp", "file"]; // Regex used to identify user:password tokens in url strings. // This is not a strict character check, because we try to fixup this part of // a the url too. // This is not a strict valid characters check, because we try to fixup this // part of the url too. XPCOMUtils.defineLazyGetter( this, "userPasswordRegex", () => /^([a-z+.-]+:\/{0,3})*[^\/@]+@.+/i ); // Regex used to identify ascii alphabetical characters. XPCOMUtils.defineLazyGetter(this, "asciiAlphaRegex", () => /^[a-z]$/i); // Regex used to look for ascii alphabetical characters. XPCOMUtils.defineLazyGetter(this, "asciiAlphaRegex", () => /[a-z]/i); // Regex used to identify ascii a-f characters. XPCOMUtils.defineLazyGetter(this, "asciiHexRegex", () => /^[a-f0-9]$/i); // Regex used to identify ascii numbers. XPCOMUtils.defineLazyGetter( this, "asciiNumberRegex", () => /^[0-9]+(\.[0-9]+)?$/ ); // Regex used to identify numbers. XPCOMUtils.defineLazyGetter(this, "numberRegex", () => /^[0-9]+(\.[0-9]+)?$/); // Regex used to identify tab separated content (having at least 2 tabs). XPCOMUtils.defineLazyGetter(this, "maxOneTabRegex", () => /^[^\t]*\t?[^\t]*$/); // Regex to test if the protocol might actually be a url without a protocol: // // http://www.faqs.org/rfcs/rfc1738.html // http://www.faqs.org/rfcs/rfc2396.html // // e.g. Anything of the form: // Regex used to test if a string with a protocol might instead be a url // without a protocol but with a port: // // <hostname>:<port> or // <hostname>:<port>/ Loading Loading @@ -138,18 +130,34 @@ XPCOMUtils.defineLazyGetter( () => /^([a-z][a-z0-9.+\t-]*):/i ); // Regex used to match IPs. Note that these are not made to validate IPs, but // just to detect strings that look like an IP. They also skip protocol. // For IPv4 this also accepts a shorthand format with just 2 dots. XPCOMUtils.defineLazyGetter( this, "IPv4LikeRegex", () => /^(?:[a-z+.-]+:\/*(?!\/))?(?:\d{1,3}\.){2,3}\d{1,3}(?::\d+|\/)?/i ); XPCOMUtils.defineLazyGetter( this, "IPv6LikeRegex", () => /^(?:[a-z+.-]+:\/*(?!\/))?\[(?:[0-9a-f]{0,4}:){0,7}[0-9a-f]{0,4}\]?(?::\d+|\/)?/i ); // Cache of whitelisted domains. XPCOMUtils.defineLazyGetter(this, "domainsWhitelist", () => { const branch = "browser.fixup.domainwhitelist."; let domains = new Set( Services.prefs .getChildList("browser.fixup.domainwhitelist.") .getChildList(branch) .filter(p => Services.prefs.getBoolPref(p, false)) .map(p => p.substring(30)) .map(p => p.substring(branch.length)) ); // Hold onto the observer. // Hold onto the observer to avoid it being GC-ed. domains._observer = { observe(subject, topic, data) { let domain = data.substring(30); let domain = data.substring(branch.length); if (Services.prefs.getBoolPref(data, false)) { domains.add(domain); } else { Loading @@ -161,11 +169,7 @@ XPCOMUtils.defineLazyGetter(this, "domainsWhitelist", () => { Ci.nsISupportsWeakReference, ]), }; Services.prefs.addObserver( "browser.fixup.domainwhitelist.", domains._observer, true ); Services.prefs.addObserver(branch, domains._observer, true); return domains; }); Loading Loading @@ -209,40 +213,12 @@ URIFixup.prototype = { let info = new URIFixupInfo(uriString); let scheme = extractScheme(uriString); let isCommonProtocol = COMMON_PROTOCOLS.includes(scheme); // View-source is a pseudo scheme. We're interested in fixing up the stuff // after it. The easiest way to do that is to call this method again with // the "view-source:" lopped off and then prepend it again afterwards. if (scheme == "view-source") { // We disable keyword lookup and alternate URIs so that small typos don't // cause us to look at very different domains. let newFixupFlags = fixupFlags & ~FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP & ~FIXUP_FLAGS_MAKE_ALTERNATE_URI; let innerURIString = uriString.substring(12).trim(); // Prevent recursion. let innerScheme = extractScheme(innerURIString); if (innerScheme == "view-source") { throw new Components.Exception( "Prevent view-source recursion", Cr.NS_ERROR_FAILURE ); } let innerInfo = this.getFixupURIInfo( innerURIString, newFixupFlags, info.preferredURI = info.fixedURI = fixupViewSource( uriString, fixupFlags, postData ); if (!innerInfo.preferredURI) { throw new Components.Exception("No preferredURI", Cr.NS_ERROR_FAILURE); } info.preferredURI = info.fixedURI = Services.io.newURI( "view-source:" + innerInfo.preferredURI.spec ); return info; } Loading @@ -259,13 +235,14 @@ URIFixup.prototype = { // Fix up common scheme typos. // TODO: Use levenshtein distance here? let isCommonProtocol = COMMON_PROTOCOLS.includes(scheme); if ( fixupSchemeTypos && fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS && scheme && !isCommonProtocol ) { for (let [typo, fixed] of [ info.fixupChangedProtocol = [ ["ttp", "http"], ["htp", "http"], ["ttps", "https"], Loading @@ -274,19 +251,18 @@ URIFixup.prototype = { ["htps", "https"], ["ile", "file"], ["le", "file"], ]) { ].some(([typo, fixed]) => { if (uriString.startsWith(typo)) { scheme = fixed; uriString = scheme + uriString.substring(typo.length); info.fixupChangedProtocol = true; isCommonProtocol = true; break; } return true; } return false; }); } // Now we need to check whether "scheme" is something we don't // really know about. // Now check whether "scheme" is something we don't know about. let isDefaultProtocolHandler = !isCommonProtocol && scheme && Loading Loading @@ -329,9 +305,7 @@ URIFixup.prototype = { if (info.fixedURI) { if (!info.preferredURI) { if (fixupFlags & FIXUP_FLAGS_MAKE_ALTERNATE_URI) { info.fixupCreatedAlternateURI = makeAlternateFixedURI(info); } maybeSetAlternateFixedURI(info, fixupFlags); info.preferredURI = info.fixedURI; } return info; Loading @@ -342,15 +316,10 @@ URIFixup.prototype = { // Prune duff protocol schemes: // ://totallybroken.url.com // //shorthand.url.com let inputHadDuffProtocol = false; if (!scheme) { if (uriString.startsWith("://")) { uriString = uriString.substring(3); inputHadDuffProtocol = true; } else if (uriString.startsWith("//")) { uriString = uriString.substring(2); inputHadDuffProtocol = true; } let inputHadDuffProtocol = uriString.startsWith("://") || uriString.startsWith("//"); if (inputHadDuffProtocol) { uriString = uriString.replace(/^:?\/\//, ""); } // Avoid fixing up content that looks like tab-separated values. Loading @@ -361,11 +330,7 @@ URIFixup.prototype = { if (uriWithProtocol) { info.fixedURI = uriWithProtocol; info.fixupChangedProtocol = true; // Did the caller want us to try an alternative URI? // If so, attempt to fixup http://foo into http://www.foo.com if (fixupFlags & FIXUP_FLAGS_MAKE_ALTERNATE_URI) { info.fixupCreatedAlternateURI = makeAlternateFixedURI(info); } maybeSetAlternateFixedURI(info, fixupFlags); info.preferredURI = info.fixedURI; } } Loading @@ -374,12 +339,11 @@ URIFixup.prototype = { if ( keywordEnabled && fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP && !inputHadDuffProtocol !inputHadDuffProtocol && keywordURIFixup(uriString, info, isPrivateContext, postData) ) { if (keywordURIFixup(uriString, info, isPrivateContext, postData)) { return info; } } if ( info.fixedURI && Loading @@ -400,7 +364,7 @@ URIFixup.prototype = { } if (!info.preferredURI) { // we couldn't salvage anything. // We couldn't salvage anything. throw new Components.Exception( "Couldn't build a valid uri", Cr.NS_ERROR_MALFORMED_URI Loading Loading @@ -575,6 +539,7 @@ URIFixupInfo.prototype = { * Implementation of isDomainWhitelisted, so we don't have to go through the * service. * @param {string} asciiHost * @returns {boolean} whether the domain is whitelisted */ function isDomainWhitelisted(asciiHost) { if (dnsFirstForSingleWords) { Loading @@ -595,8 +560,8 @@ function isDomainWhitelisted(asciiHost) { * If the suffix is unknown due to a typo this will try to fix it up. * @param {URIFixupInfo} info about the uri to check. * @note this may modify the public suffix of info.fixedURI. * @returns false if the public suffix is unknown, true in any other case, * included if it's not present. * @returns {boolean} false if the public suffix is unknown, true in any other * case, included if it's not present. */ function checkAndFixPublicSuffix(info) { let uri = info.fixedURI; Loading Loading @@ -625,7 +590,7 @@ function checkAndFixPublicSuffix(info) { // Suffix is unknown, try to fix most common 3 chars TLDs typos. // .com is the most commonly mistyped tld, so it has more cases. let suffix = Services.eTLD.getPublicSuffix(uri); if (!suffix || asciiNumberRegex.test(suffix)) { if (!suffix || numberRegex.test(suffix)) { return true; } for (let [typo, fixed] of [ Loading Loading @@ -685,12 +650,14 @@ function tryKeywordFixupForURIInfo( * This generates an alternate fixedURI, by adding a prefix and a suffix to * the fixedURI host, if and only if the protocol is http. It should _never_ * modify URIs with other protocols. * @param info an URIInfo object * @returns whether an alternate uri was generated. * @param {URIFixupInfo} info an URIInfo object * @param {integer} fixupFlags the fixup flags * @returns {boolean} Whether an alternate uri was generated */ function makeAlternateFixedURI(info) { function maybeSetAlternateFixedURI(info, fixupFlags) { let uri = info.fixedURI; if ( !(fixupFlags & FIXUP_FLAGS_MAKE_ALTERNATE_URI) || !alternateEnabled || // Code only works for http. Not for any other protocol including https! !uri.schemeIs("http") || Loading @@ -701,14 +668,13 @@ function makeAlternateFixedURI(info) { ) { return false; } let oldHost = uri.host; // Don't create an alternate uri for localhost, because it would be confusing. if (oldHost == "localhost") { return false; } let numDots = (oldHost.match(/\./g) || []).length; // Get the prefix and suffix to stick onto the new hostname. By default these // are www. & .com but they could be any other value, e.g. www. & .org let prefix = Services.prefs.getCharPref( Loading @@ -721,6 +687,7 @@ function makeAlternateFixedURI(info) { ); let newHost = ""; let numDots = (oldHost.match(/\./g) || []).length; if (numDots == 0) { newHost = prefix + oldHost + suffix; } else if (numDots == 1) { Loading @@ -746,12 +713,13 @@ function makeAlternateFixedURI(info) { } return false; } info.fixupCreatedAlternateURI = true; return true; } /** * Try to fixup a file URI. * @param uriString The file URI to fix. * @param {string} uriString The file URI to fix. * @returns {nsIURI} a fixed uri or null. * @note FileURIFixup only returns a URI if it has to add the file: protocol. */ Loading Loading @@ -791,7 +759,7 @@ function fileURIFixup(uriString) { * user:pass@no-scheme.com * * @param {string} uriString The string to fixup. * @returns an nsIURI built adding the default protocol to the string, * @returns {nsIURI} an nsIURI built adding the default protocol to the string, * or null if fixing was not possible. */ function fixupURIProtocol(uriString) { Loading Loading @@ -837,132 +805,41 @@ function keywordURIFixup(uriString, fixupInfo, isPrivateContext, postData) { // "nonQualifiedHost?some args" // "blah.com." // Note: -1 is greater than any actual location // in practice. So if we cast all locations to uint32_t, then a < // b guarantees that either b is kNotFound and a is found, or both // are found and a found before b. // TODO: the cpp code used to assign -1 to an unsigned int, causing overflow. // We should replace most of this with better js code. const NOT_FOUND = Infinity; let firstDotLoc = NOT_FOUND; let lastDotLoc = NOT_FOUND; let firstColonLoc = NOT_FOUND; let firstQuoteLoc = NOT_FOUND; let firstSpaceLoc = NOT_FOUND; let firstQMarkLoc = NOT_FOUND; let lastLSBracketLoc = NOT_FOUND; let lastSlashLoc = NOT_FOUND; let foundDots = 0; let foundColons = 0; let foundDigits = 0; let foundRSBrackets = 0; let looksLikeIpv6 = true; let hasAsciiAlpha = false; for (let i = 0, pos = 0; i < uriString.length; ++i, ++pos) { let iter = uriString[i]; if (pos >= 1 && foundRSBrackets == 0) { if ( !( lastLSBracketLoc == 0 && (iter == ":" || iter == "." || iter == "]" || asciiHexRegex.test(iter)) ) ) { looksLikeIpv6 = false; } } // If we're at the end of the string or this is the first slash, // check if the thing before the slash looks like ipv4: if ( (i == uriString.length - 1 || (lastSlashLoc == NOT_FOUND && iter == "/")) && // Need 2 or 3 dots + only digits (foundDots == 2 || foundDots == 3) && // and they should be all that came before now: (foundDots + foundDigits == pos || // or maybe there was also exactly 1 colon that came after the last // dot, and the digits, dots and colon were all that came before now: (foundColons == 1 && firstColonLoc > lastDotLoc && foundDots + foundDigits + foundColons == pos)) ) { // Hurray, we got ourselves some ipv4! // At this point, there's no way we will do a keyword lookup, so just bail // immediately: // Check for IPs. if (IPv4LikeRegex.test(uriString) || IPv6LikeRegex.test(uriString)) { return false; } if (iter == ".") { foundDots++; lastDotLoc = pos; if (firstDotLoc == NOT_FOUND) { firstDotLoc = pos; } } else if (iter == ":") { foundColons++; if (firstColonLoc == NOT_FOUND) { firstColonLoc = pos; } } else if (iter == " " && firstSpaceLoc == NOT_FOUND) { firstSpaceLoc = pos; } else if (iter == "?" && firstQMarkLoc == NOT_FOUND) { firstQMarkLoc = pos; } else if ((iter == "'" || iter == '"') && firstQuoteLoc == NOT_FOUND) { firstQuoteLoc = pos; } else if (iter == "[") { lastLSBracketLoc = pos; } else if (iter == "]") { foundRSBrackets++; } else if (iter == "/") { lastSlashLoc = pos; } else if (asciiAlphaRegex.test(iter)) { hasAsciiAlpha = true; } else if (asciiNumberRegex.test(iter)) { foundDigits++; } } if (lastLSBracketLoc > 0 || foundRSBrackets != 1) { looksLikeIpv6 = false; // We do keyword lookups if the input starts with a question mark, or if it // contains a space or quote, provided they don't come after a dot, colon or // question mark. if (uriString.startsWith("?") || /^[^.:?]*[\s"']/.test(uriString)) { return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, isPrivateContext, postData ); } // If there are only colons and only hexadecimal characters ([a-z][0-9]) // enclosed in [], then don't do a keyword lookup if (looksLikeIpv6) { // Avoid lookup if we can identify an host and it's whitelisted. // Note that if dnsFirstForSingleWords is true isDomainWhitelisted will always // return true, so we can avoid checking dnsFirstForSingleWords after this. let asciiHost = fixupInfo.fixedURI?.asciiHost; if (asciiHost && isDomainWhitelisted(asciiHost)) { return false; } let asciiHost = fixupInfo.fixedURI && fixupInfo.fixedURI.asciiHost; // Or when the asciiHost is the same as displayHost and there are no // alphabetical characters let displayHost = fixupInfo.fixedURI && fixupInfo.fixedURI.displayHost; // We do keyword lookups if a space or quote preceded the dot, colon // or question mark (or if the latter is not found, or if the input starts // with a question mark) let hasAsciiAlpha = asciiAlphaRegex.test(uriString); if ( ((firstSpaceLoc < firstDotLoc || firstQuoteLoc < firstDotLoc) && (firstSpaceLoc < firstColonLoc || firstQuoteLoc < firstColonLoc) && (firstSpaceLoc < firstQMarkLoc || firstQuoteLoc < firstQMarkLoc)) || firstQMarkLoc == 0 ) { return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, isPrivateContext, postData ); // ... or when the asciiHost is the same as displayHost and there are no // characters from [a-z][A-Z] } else if ( asciiHost && displayHost && !hasAsciiAlpha && asciiHost.toLowerCase() == displayHost.toLowerCase() ) { if (!dnsFirstForSingleWords) { return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, Loading @@ -970,32 +847,33 @@ function keywordURIFixup(uriString, fixupInfo, isPrivateContext, postData) { postData ); } // ... or if there is no question mark or colon, and there is either no // dot, or exactly 1 and it is the first or last character of the input: } else if ( (firstDotLoc == NOT_FOUND || (foundDots == 1 && (firstDotLoc == 0 || firstDotLoc == uriString.length - 1))) && firstColonLoc == NOT_FOUND && firstQMarkLoc == NOT_FOUND ) { if (asciiHost && isDomainWhitelisted(asciiHost)) { // Avoid lookup if we reached this point and there is a question mark or colon. if (uriString.includes(":") || uriString.includes("?")) { return false; } // ... unless there are no dots, and a slash, and alpha characters, and // this is a valid host: // Keyword lookup if there is exactly one dot and it is the first or last // character of the input. let firstDotIndex = uriString.indexOf("."); if ( firstDotLoc == NOT_FOUND && lastSlashLoc != NOT_FOUND && hasAsciiAlpha && asciiHost firstDotIndex == uriString.length - 1 || (firstDotIndex == 0 && firstDotIndex == uriString.lastIndexOf(".")) ) { return false; return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, isPrivateContext, postData ); } // If we get here, we don't have a valid URI, or we did but the // host is not whitelisted, so we do a keyword search *anyway*: // Keyword lookup if there is no dot and the string doesn't include a slash, // or any alphabetical character or an host. if ( firstDotIndex == -1 && (!uriString.includes("/") || !hasAsciiAlpha || !asciiHost) ) { return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, Loading @@ -1003,10 +881,59 @@ function keywordURIFixup(uriString, fixupInfo, isPrivateContext, postData) { postData ); } return false; } /** * Mimics the logic in Services.io.extractScheme, but avoids crossing XPConnect. * @param {string} uriString the string to examine * @returns {string} a scheme or empty string if one could not be identified */ function extractScheme(uriString) { let matches = uriString.match(possibleProtocolRegex); return matches ? matches[1].replace("\t", "").toLowerCase() : ""; } /** * View-source is a pseudo scheme. We're interested in fixing up the stuff * after it. The easiest way to do that is to call this method again with * the "view-source:" lopped off and then prepend it again afterwards. * @param {string} uriString The original string to fixup * @param {integer} fixupFlags The original fixup flags * @param {nsIInputStream} postData Optional POST data for the search * @returns {nsIURI} a fixed URI * @throws if it's not possible to fixup the url */ function fixupViewSource(uriString, fixupFlags, postData) { // We disable keyword lookup and alternate URIs so that small typos don't // cause us to look at very different domains. let newFixupFlags = fixupFlags & ~FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP & ~FIXUP_FLAGS_MAKE_ALTERNATE_URI; let innerURIString = uriString.substring(12).trim(); // Prevent recursion. let innerScheme = extractScheme(innerURIString); if (innerScheme == "view-source") { throw new Components.Exception( "Prevent view-source recursion", Cr.NS_ERROR_FAILURE ); } let info = Services.uriFixup.getFixupURIInfo( innerURIString, newFixupFlags, postData ); if (!info.preferredURI) { throw new Components.Exception( "Couldn't build a valid uri", Cr.NS_ERROR_MALFORMED_URI ); } return Services.io.newURI("view-source:" + info.preferredURI.spec); } Loading
docshell/base/URIFixup.jsm +169 −242 Original line number Diff line number Diff line Loading @@ -7,11 +7,14 @@ /** * This component handles fixing up URIs, by correcting obvious typos and adding * missing schemes. * URI references: * http://www.faqs.org/rfcs/rfc1738.html * http://www.faqs.org/rfcs/rfc2396.html */ // TODO (Bug 1623383): This is a direct conversion from .cpp, as such a few // functions are overly complex and should be cleaned up. /* eslint complexity: ["error", 63] */ // getFixupURIInfo has a complex logic, that likely could be simplified, but // the risk of regressions is high, thus that should be done with care. /* eslint complexity: ["error", 39] */ var EXPORTED_SYMBOLS = ["URIFixup", "URIFixupInfo"]; Loading Loading @@ -74,36 +77,25 @@ const { const COMMON_PROTOCOLS = ["http", "https", "ftp", "file"]; // Regex used to identify user:password tokens in url strings. // This is not a strict character check, because we try to fixup this part of // a the url too. // This is not a strict valid characters check, because we try to fixup this // part of the url too. XPCOMUtils.defineLazyGetter( this, "userPasswordRegex", () => /^([a-z+.-]+:\/{0,3})*[^\/@]+@.+/i ); // Regex used to identify ascii alphabetical characters. XPCOMUtils.defineLazyGetter(this, "asciiAlphaRegex", () => /^[a-z]$/i); // Regex used to look for ascii alphabetical characters. XPCOMUtils.defineLazyGetter(this, "asciiAlphaRegex", () => /[a-z]/i); // Regex used to identify ascii a-f characters. XPCOMUtils.defineLazyGetter(this, "asciiHexRegex", () => /^[a-f0-9]$/i); // Regex used to identify ascii numbers. XPCOMUtils.defineLazyGetter( this, "asciiNumberRegex", () => /^[0-9]+(\.[0-9]+)?$/ ); // Regex used to identify numbers. XPCOMUtils.defineLazyGetter(this, "numberRegex", () => /^[0-9]+(\.[0-9]+)?$/); // Regex used to identify tab separated content (having at least 2 tabs). XPCOMUtils.defineLazyGetter(this, "maxOneTabRegex", () => /^[^\t]*\t?[^\t]*$/); // Regex to test if the protocol might actually be a url without a protocol: // // http://www.faqs.org/rfcs/rfc1738.html // http://www.faqs.org/rfcs/rfc2396.html // // e.g. Anything of the form: // Regex used to test if a string with a protocol might instead be a url // without a protocol but with a port: // // <hostname>:<port> or // <hostname>:<port>/ Loading Loading @@ -138,18 +130,34 @@ XPCOMUtils.defineLazyGetter( () => /^([a-z][a-z0-9.+\t-]*):/i ); // Regex used to match IPs. Note that these are not made to validate IPs, but // just to detect strings that look like an IP. They also skip protocol. // For IPv4 this also accepts a shorthand format with just 2 dots. XPCOMUtils.defineLazyGetter( this, "IPv4LikeRegex", () => /^(?:[a-z+.-]+:\/*(?!\/))?(?:\d{1,3}\.){2,3}\d{1,3}(?::\d+|\/)?/i ); XPCOMUtils.defineLazyGetter( this, "IPv6LikeRegex", () => /^(?:[a-z+.-]+:\/*(?!\/))?\[(?:[0-9a-f]{0,4}:){0,7}[0-9a-f]{0,4}\]?(?::\d+|\/)?/i ); // Cache of whitelisted domains. XPCOMUtils.defineLazyGetter(this, "domainsWhitelist", () => { const branch = "browser.fixup.domainwhitelist."; let domains = new Set( Services.prefs .getChildList("browser.fixup.domainwhitelist.") .getChildList(branch) .filter(p => Services.prefs.getBoolPref(p, false)) .map(p => p.substring(30)) .map(p => p.substring(branch.length)) ); // Hold onto the observer. // Hold onto the observer to avoid it being GC-ed. domains._observer = { observe(subject, topic, data) { let domain = data.substring(30); let domain = data.substring(branch.length); if (Services.prefs.getBoolPref(data, false)) { domains.add(domain); } else { Loading @@ -161,11 +169,7 @@ XPCOMUtils.defineLazyGetter(this, "domainsWhitelist", () => { Ci.nsISupportsWeakReference, ]), }; Services.prefs.addObserver( "browser.fixup.domainwhitelist.", domains._observer, true ); Services.prefs.addObserver(branch, domains._observer, true); return domains; }); Loading Loading @@ -209,40 +213,12 @@ URIFixup.prototype = { let info = new URIFixupInfo(uriString); let scheme = extractScheme(uriString); let isCommonProtocol = COMMON_PROTOCOLS.includes(scheme); // View-source is a pseudo scheme. We're interested in fixing up the stuff // after it. The easiest way to do that is to call this method again with // the "view-source:" lopped off and then prepend it again afterwards. if (scheme == "view-source") { // We disable keyword lookup and alternate URIs so that small typos don't // cause us to look at very different domains. let newFixupFlags = fixupFlags & ~FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP & ~FIXUP_FLAGS_MAKE_ALTERNATE_URI; let innerURIString = uriString.substring(12).trim(); // Prevent recursion. let innerScheme = extractScheme(innerURIString); if (innerScheme == "view-source") { throw new Components.Exception( "Prevent view-source recursion", Cr.NS_ERROR_FAILURE ); } let innerInfo = this.getFixupURIInfo( innerURIString, newFixupFlags, info.preferredURI = info.fixedURI = fixupViewSource( uriString, fixupFlags, postData ); if (!innerInfo.preferredURI) { throw new Components.Exception("No preferredURI", Cr.NS_ERROR_FAILURE); } info.preferredURI = info.fixedURI = Services.io.newURI( "view-source:" + innerInfo.preferredURI.spec ); return info; } Loading @@ -259,13 +235,14 @@ URIFixup.prototype = { // Fix up common scheme typos. // TODO: Use levenshtein distance here? let isCommonProtocol = COMMON_PROTOCOLS.includes(scheme); if ( fixupSchemeTypos && fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS && scheme && !isCommonProtocol ) { for (let [typo, fixed] of [ info.fixupChangedProtocol = [ ["ttp", "http"], ["htp", "http"], ["ttps", "https"], Loading @@ -274,19 +251,18 @@ URIFixup.prototype = { ["htps", "https"], ["ile", "file"], ["le", "file"], ]) { ].some(([typo, fixed]) => { if (uriString.startsWith(typo)) { scheme = fixed; uriString = scheme + uriString.substring(typo.length); info.fixupChangedProtocol = true; isCommonProtocol = true; break; } return true; } return false; }); } // Now we need to check whether "scheme" is something we don't // really know about. // Now check whether "scheme" is something we don't know about. let isDefaultProtocolHandler = !isCommonProtocol && scheme && Loading Loading @@ -329,9 +305,7 @@ URIFixup.prototype = { if (info.fixedURI) { if (!info.preferredURI) { if (fixupFlags & FIXUP_FLAGS_MAKE_ALTERNATE_URI) { info.fixupCreatedAlternateURI = makeAlternateFixedURI(info); } maybeSetAlternateFixedURI(info, fixupFlags); info.preferredURI = info.fixedURI; } return info; Loading @@ -342,15 +316,10 @@ URIFixup.prototype = { // Prune duff protocol schemes: // ://totallybroken.url.com // //shorthand.url.com let inputHadDuffProtocol = false; if (!scheme) { if (uriString.startsWith("://")) { uriString = uriString.substring(3); inputHadDuffProtocol = true; } else if (uriString.startsWith("//")) { uriString = uriString.substring(2); inputHadDuffProtocol = true; } let inputHadDuffProtocol = uriString.startsWith("://") || uriString.startsWith("//"); if (inputHadDuffProtocol) { uriString = uriString.replace(/^:?\/\//, ""); } // Avoid fixing up content that looks like tab-separated values. Loading @@ -361,11 +330,7 @@ URIFixup.prototype = { if (uriWithProtocol) { info.fixedURI = uriWithProtocol; info.fixupChangedProtocol = true; // Did the caller want us to try an alternative URI? // If so, attempt to fixup http://foo into http://www.foo.com if (fixupFlags & FIXUP_FLAGS_MAKE_ALTERNATE_URI) { info.fixupCreatedAlternateURI = makeAlternateFixedURI(info); } maybeSetAlternateFixedURI(info, fixupFlags); info.preferredURI = info.fixedURI; } } Loading @@ -374,12 +339,11 @@ URIFixup.prototype = { if ( keywordEnabled && fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP && !inputHadDuffProtocol !inputHadDuffProtocol && keywordURIFixup(uriString, info, isPrivateContext, postData) ) { if (keywordURIFixup(uriString, info, isPrivateContext, postData)) { return info; } } if ( info.fixedURI && Loading @@ -400,7 +364,7 @@ URIFixup.prototype = { } if (!info.preferredURI) { // we couldn't salvage anything. // We couldn't salvage anything. throw new Components.Exception( "Couldn't build a valid uri", Cr.NS_ERROR_MALFORMED_URI Loading Loading @@ -575,6 +539,7 @@ URIFixupInfo.prototype = { * Implementation of isDomainWhitelisted, so we don't have to go through the * service. * @param {string} asciiHost * @returns {boolean} whether the domain is whitelisted */ function isDomainWhitelisted(asciiHost) { if (dnsFirstForSingleWords) { Loading @@ -595,8 +560,8 @@ function isDomainWhitelisted(asciiHost) { * If the suffix is unknown due to a typo this will try to fix it up. * @param {URIFixupInfo} info about the uri to check. * @note this may modify the public suffix of info.fixedURI. * @returns false if the public suffix is unknown, true in any other case, * included if it's not present. * @returns {boolean} false if the public suffix is unknown, true in any other * case, included if it's not present. */ function checkAndFixPublicSuffix(info) { let uri = info.fixedURI; Loading Loading @@ -625,7 +590,7 @@ function checkAndFixPublicSuffix(info) { // Suffix is unknown, try to fix most common 3 chars TLDs typos. // .com is the most commonly mistyped tld, so it has more cases. let suffix = Services.eTLD.getPublicSuffix(uri); if (!suffix || asciiNumberRegex.test(suffix)) { if (!suffix || numberRegex.test(suffix)) { return true; } for (let [typo, fixed] of [ Loading Loading @@ -685,12 +650,14 @@ function tryKeywordFixupForURIInfo( * This generates an alternate fixedURI, by adding a prefix and a suffix to * the fixedURI host, if and only if the protocol is http. It should _never_ * modify URIs with other protocols. * @param info an URIInfo object * @returns whether an alternate uri was generated. * @param {URIFixupInfo} info an URIInfo object * @param {integer} fixupFlags the fixup flags * @returns {boolean} Whether an alternate uri was generated */ function makeAlternateFixedURI(info) { function maybeSetAlternateFixedURI(info, fixupFlags) { let uri = info.fixedURI; if ( !(fixupFlags & FIXUP_FLAGS_MAKE_ALTERNATE_URI) || !alternateEnabled || // Code only works for http. Not for any other protocol including https! !uri.schemeIs("http") || Loading @@ -701,14 +668,13 @@ function makeAlternateFixedURI(info) { ) { return false; } let oldHost = uri.host; // Don't create an alternate uri for localhost, because it would be confusing. if (oldHost == "localhost") { return false; } let numDots = (oldHost.match(/\./g) || []).length; // Get the prefix and suffix to stick onto the new hostname. By default these // are www. & .com but they could be any other value, e.g. www. & .org let prefix = Services.prefs.getCharPref( Loading @@ -721,6 +687,7 @@ function makeAlternateFixedURI(info) { ); let newHost = ""; let numDots = (oldHost.match(/\./g) || []).length; if (numDots == 0) { newHost = prefix + oldHost + suffix; } else if (numDots == 1) { Loading @@ -746,12 +713,13 @@ function makeAlternateFixedURI(info) { } return false; } info.fixupCreatedAlternateURI = true; return true; } /** * Try to fixup a file URI. * @param uriString The file URI to fix. * @param {string} uriString The file URI to fix. * @returns {nsIURI} a fixed uri or null. * @note FileURIFixup only returns a URI if it has to add the file: protocol. */ Loading Loading @@ -791,7 +759,7 @@ function fileURIFixup(uriString) { * user:pass@no-scheme.com * * @param {string} uriString The string to fixup. * @returns an nsIURI built adding the default protocol to the string, * @returns {nsIURI} an nsIURI built adding the default protocol to the string, * or null if fixing was not possible. */ function fixupURIProtocol(uriString) { Loading Loading @@ -837,132 +805,41 @@ function keywordURIFixup(uriString, fixupInfo, isPrivateContext, postData) { // "nonQualifiedHost?some args" // "blah.com." // Note: -1 is greater than any actual location // in practice. So if we cast all locations to uint32_t, then a < // b guarantees that either b is kNotFound and a is found, or both // are found and a found before b. // TODO: the cpp code used to assign -1 to an unsigned int, causing overflow. // We should replace most of this with better js code. const NOT_FOUND = Infinity; let firstDotLoc = NOT_FOUND; let lastDotLoc = NOT_FOUND; let firstColonLoc = NOT_FOUND; let firstQuoteLoc = NOT_FOUND; let firstSpaceLoc = NOT_FOUND; let firstQMarkLoc = NOT_FOUND; let lastLSBracketLoc = NOT_FOUND; let lastSlashLoc = NOT_FOUND; let foundDots = 0; let foundColons = 0; let foundDigits = 0; let foundRSBrackets = 0; let looksLikeIpv6 = true; let hasAsciiAlpha = false; for (let i = 0, pos = 0; i < uriString.length; ++i, ++pos) { let iter = uriString[i]; if (pos >= 1 && foundRSBrackets == 0) { if ( !( lastLSBracketLoc == 0 && (iter == ":" || iter == "." || iter == "]" || asciiHexRegex.test(iter)) ) ) { looksLikeIpv6 = false; } } // If we're at the end of the string or this is the first slash, // check if the thing before the slash looks like ipv4: if ( (i == uriString.length - 1 || (lastSlashLoc == NOT_FOUND && iter == "/")) && // Need 2 or 3 dots + only digits (foundDots == 2 || foundDots == 3) && // and they should be all that came before now: (foundDots + foundDigits == pos || // or maybe there was also exactly 1 colon that came after the last // dot, and the digits, dots and colon were all that came before now: (foundColons == 1 && firstColonLoc > lastDotLoc && foundDots + foundDigits + foundColons == pos)) ) { // Hurray, we got ourselves some ipv4! // At this point, there's no way we will do a keyword lookup, so just bail // immediately: // Check for IPs. if (IPv4LikeRegex.test(uriString) || IPv6LikeRegex.test(uriString)) { return false; } if (iter == ".") { foundDots++; lastDotLoc = pos; if (firstDotLoc == NOT_FOUND) { firstDotLoc = pos; } } else if (iter == ":") { foundColons++; if (firstColonLoc == NOT_FOUND) { firstColonLoc = pos; } } else if (iter == " " && firstSpaceLoc == NOT_FOUND) { firstSpaceLoc = pos; } else if (iter == "?" && firstQMarkLoc == NOT_FOUND) { firstQMarkLoc = pos; } else if ((iter == "'" || iter == '"') && firstQuoteLoc == NOT_FOUND) { firstQuoteLoc = pos; } else if (iter == "[") { lastLSBracketLoc = pos; } else if (iter == "]") { foundRSBrackets++; } else if (iter == "/") { lastSlashLoc = pos; } else if (asciiAlphaRegex.test(iter)) { hasAsciiAlpha = true; } else if (asciiNumberRegex.test(iter)) { foundDigits++; } } if (lastLSBracketLoc > 0 || foundRSBrackets != 1) { looksLikeIpv6 = false; // We do keyword lookups if the input starts with a question mark, or if it // contains a space or quote, provided they don't come after a dot, colon or // question mark. if (uriString.startsWith("?") || /^[^.:?]*[\s"']/.test(uriString)) { return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, isPrivateContext, postData ); } // If there are only colons and only hexadecimal characters ([a-z][0-9]) // enclosed in [], then don't do a keyword lookup if (looksLikeIpv6) { // Avoid lookup if we can identify an host and it's whitelisted. // Note that if dnsFirstForSingleWords is true isDomainWhitelisted will always // return true, so we can avoid checking dnsFirstForSingleWords after this. let asciiHost = fixupInfo.fixedURI?.asciiHost; if (asciiHost && isDomainWhitelisted(asciiHost)) { return false; } let asciiHost = fixupInfo.fixedURI && fixupInfo.fixedURI.asciiHost; // Or when the asciiHost is the same as displayHost and there are no // alphabetical characters let displayHost = fixupInfo.fixedURI && fixupInfo.fixedURI.displayHost; // We do keyword lookups if a space or quote preceded the dot, colon // or question mark (or if the latter is not found, or if the input starts // with a question mark) let hasAsciiAlpha = asciiAlphaRegex.test(uriString); if ( ((firstSpaceLoc < firstDotLoc || firstQuoteLoc < firstDotLoc) && (firstSpaceLoc < firstColonLoc || firstQuoteLoc < firstColonLoc) && (firstSpaceLoc < firstQMarkLoc || firstQuoteLoc < firstQMarkLoc)) || firstQMarkLoc == 0 ) { return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, isPrivateContext, postData ); // ... or when the asciiHost is the same as displayHost and there are no // characters from [a-z][A-Z] } else if ( asciiHost && displayHost && !hasAsciiAlpha && asciiHost.toLowerCase() == displayHost.toLowerCase() ) { if (!dnsFirstForSingleWords) { return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, Loading @@ -970,32 +847,33 @@ function keywordURIFixup(uriString, fixupInfo, isPrivateContext, postData) { postData ); } // ... or if there is no question mark or colon, and there is either no // dot, or exactly 1 and it is the first or last character of the input: } else if ( (firstDotLoc == NOT_FOUND || (foundDots == 1 && (firstDotLoc == 0 || firstDotLoc == uriString.length - 1))) && firstColonLoc == NOT_FOUND && firstQMarkLoc == NOT_FOUND ) { if (asciiHost && isDomainWhitelisted(asciiHost)) { // Avoid lookup if we reached this point and there is a question mark or colon. if (uriString.includes(":") || uriString.includes("?")) { return false; } // ... unless there are no dots, and a slash, and alpha characters, and // this is a valid host: // Keyword lookup if there is exactly one dot and it is the first or last // character of the input. let firstDotIndex = uriString.indexOf("."); if ( firstDotLoc == NOT_FOUND && lastSlashLoc != NOT_FOUND && hasAsciiAlpha && asciiHost firstDotIndex == uriString.length - 1 || (firstDotIndex == 0 && firstDotIndex == uriString.lastIndexOf(".")) ) { return false; return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, isPrivateContext, postData ); } // If we get here, we don't have a valid URI, or we did but the // host is not whitelisted, so we do a keyword search *anyway*: // Keyword lookup if there is no dot and the string doesn't include a slash, // or any alphabetical character or an host. if ( firstDotIndex == -1 && (!uriString.includes("/") || !hasAsciiAlpha || !asciiHost) ) { return tryKeywordFixupForURIInfo( fixupInfo.originalInput, fixupInfo, Loading @@ -1003,10 +881,59 @@ function keywordURIFixup(uriString, fixupInfo, isPrivateContext, postData) { postData ); } return false; } /** * Mimics the logic in Services.io.extractScheme, but avoids crossing XPConnect. * @param {string} uriString the string to examine * @returns {string} a scheme or empty string if one could not be identified */ function extractScheme(uriString) { let matches = uriString.match(possibleProtocolRegex); return matches ? matches[1].replace("\t", "").toLowerCase() : ""; } /** * View-source is a pseudo scheme. We're interested in fixing up the stuff * after it. The easiest way to do that is to call this method again with * the "view-source:" lopped off and then prepend it again afterwards. * @param {string} uriString The original string to fixup * @param {integer} fixupFlags The original fixup flags * @param {nsIInputStream} postData Optional POST data for the search * @returns {nsIURI} a fixed URI * @throws if it's not possible to fixup the url */ function fixupViewSource(uriString, fixupFlags, postData) { // We disable keyword lookup and alternate URIs so that small typos don't // cause us to look at very different domains. let newFixupFlags = fixupFlags & ~FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP & ~FIXUP_FLAGS_MAKE_ALTERNATE_URI; let innerURIString = uriString.substring(12).trim(); // Prevent recursion. let innerScheme = extractScheme(innerURIString); if (innerScheme == "view-source") { throw new Components.Exception( "Prevent view-source recursion", Cr.NS_ERROR_FAILURE ); } let info = Services.uriFixup.getFixupURIInfo( innerURIString, newFixupFlags, postData ); if (!info.preferredURI) { throw new Components.Exception( "Couldn't build a valid uri", Cr.NS_ERROR_MALFORMED_URI ); } return Services.io.newURI("view-source:" + info.preferredURI.spec); }