Commit b7663bfe authored by James Teow's avatar James Teow
Browse files

Bug 1835321 - Allow any link on SERPs that don't match an ad expression to be...

Bug 1835321 - Allow any link on SERPs that don't match an ad expression to be considered a non-ad - r=Standard8

Differential Revision: https://phabricator.services.mozilla.com/D179367
parent f31d0a6e
Loading
Loading
Loading
Loading
+0 −10
Original line number Diff line number Diff line
@@ -67,9 +67,6 @@ class SearchProviders {
          extraAdServersRegexps: p.extraAdServersRegexps.map(
            r => new RegExp(r)
          ),
          nonAdsLinkRegexps: p.nonAdsLinkRegexps?.length
            ? p.nonAdsLinkRegexps.map(r => new RegExp(r))
            : [],
        };
      });

@@ -388,13 +385,6 @@ class SearchAdImpression {
        if (result.relatedElements?.length) {
          this.#addEventListenerToElements(result.relatedElements, result.type);
        }
        // If an anchor doesn't match any component, and it doesn't have a non
        // ads link regexp, cache the anchor so the parent process can observe
        // them.
      } else if (!this.#providerInfo.nonAdsLinkRegexps.length) {
        this.#recordElementData(anchor, {
          type: "non_ads_link",
        });
      }
    }
  }
+52 −53
Original line number Diff line number Diff line
@@ -925,9 +925,16 @@ class ContentHandler {
    }

    let wrappedChannel = ChannelWrapper.get(channel);
    // The channel we're observing might be a redirect of a channel we've
    // observed before.
    if (wrappedChannel._adClickRecorded) {
      lazy.logConsole.debug("Ad click already recorded");
      return;
      // When _adClickRecorded is false but _recordedClick is true, it means we
      // recorded a non-ad link click, and it is being re-directed.
    } else if (wrappedChannel._recordedClick) {
      lazy.logConsole.debug("Non ad-click already recorded");
      return;
    }

    Services.tm.dispatchToMainThread(() => {
@@ -953,15 +960,35 @@ class ContentHandler {
        return provider.telemetryId == providerInfo;
      });

      // The SERP "clicked" action is implied if a user loads another page from
      // the context of a SERP. At this point, we don't know if the request is
      // from a SERP but we want to avoid inspecting requests that are not
      // documents, or not a top level load.
      // Some channels re-direct by loading pages that return 200. The result
      // is the channel will have an originURL that changes from the SERP to
      // either a nonAdsRegexp or an extraAdServersRegexps. This is typical
      // for loading a page in a new tab. The channel will have changed so any
      // properties attached to them to record state (e.g. _recordedClick)
      // won't be present.
      if (
        info.nonAdsLinkRegexps.some(r => r.test(originURL)) ||
        info.extraAdServersRegexps.some(r => r.test(originURL))
      ) {
        return;
      }

      // A click event is recorded if a user loads a resource from an
      // originURL that is a SERP.
      //
      // Typically, we only want top level loads containing documents to avoid
      // recording any event on an in-page resource a SERP might load
      // (e.g. CSS files).
      //
      // The exception to this is if a subframe loads a resource that matches
      // a non ad link. Some SERPs encode non ad search results with a URL
      // that gets loaded into an iframe, which then tells the container of
      // the iframe to change the location of the page.
      if (
        lazy.serpEventsEnabled &&
        channel.isDocument &&
        channel.loadInfo.isTopLevelLoad &&
        !wrappedChannel._countedClick
        (channel.loadInfo.isTopLevelLoad ||
          info.nonAdsLinkRegexps.some(r => r.test(URL)))
      ) {
        let start = Cu.now();

@@ -1018,27 +1045,12 @@ class ContentHandler {
              }
            }
          }
          // Check if the href matches a non-ads link. Do this after looking at
          // hrefToComponentMap because a link that looks like a non-ad might
          // have a more specific component type.

          // Default value for URLs that don't match any components categorized
          // on the page.
          if (!type) {
            type = info.nonAdsLinkRegexps.some(r => r.test(URL))
              ? SearchSERPTelemetryUtils.COMPONENTS.NON_ADS_LINK
              : "";
          }
          // The SERP may have moved onto another page that matches a SERP page
          // e.g. Related Search
          if (!type && isSerp) {
            type = SearchSERPTelemetryUtils.COMPONENTS.NON_ADS_LINK;
          }
          // There might be other types of pages on a SERP that don't fall
          // neatly into expected non-ad expressions or SERPs, such as Image
          // Search, Maps, etc.
          if (!type) {
            type = info.extraPageRegexps?.some(r => r.test(URL))
              ? SearchSERPTelemetryUtils.COMPONENTS.NON_ADS_LINK
              : "";
          }

          if (isSerp && isFromNewtab) {
            SearchSERPTelemetry.setBrowserContentSource(
@@ -1047,18 +1059,7 @@ class ContentHandler {
            );
          }

          // Step 3: If we have a type, record an engagement.
          // Exceptions:
          // - Related searches on some SERPs can be encoded with a URL that
          //   match a nonAdsLinkRegexp. This means we'll have seen the link
          //   twice, once with the nonAdsLinkRegexp and again with a SERP URL
          //   matching a searchPageRegexp. We don't want to record the
          //   engagement twice, so if the origin of the request was
          //   nonAdsLinkRegexp, skip the categorization. The reason why we
          //   don't do this check earlier is because if the final URL is a
          //   SERP, we'll want to define the source property of the subsequent
          //   SERP impression.
          if (type && !info.nonAdsLinkRegexps.some(r => r.test(originURL))) {
          // Step 3: Record the engagement.
          impressionIdsWithoutEngagementsSet.delete(
            telemetryState.impressionId
          );
@@ -1072,10 +1073,8 @@ class ContentHandler {
            type,
            URL,
          });
            wrappedChannel._countedClick = true;
          } else if (!type) {
            lazy.logConsole.warn(`Could not find a component type for ${URL}`);
          }
          // Prevent re-directed channels from being examined more than once.
          wrappedChannel._recordedClick = true;
        }
        ChromeUtils.addProfilerMarker(
          "SearchSERPTelemetry._observeActivity",
+18 −1
Original line number Diff line number Diff line
@@ -96,16 +96,33 @@ support-files =
  serp.css
[browser_search_telemetry_engagement_multiple_tabs.js]
tags = search-telemetry
support-files =
  searchTelemetryAd_searchbox_with_content.html
  searchTelemetryAd_searchbox_with_content.html^headers^
[browser_search_telemetry_engagement_non_ad.js]
tags = search-telemetry
support-files =
  searchTelemetryAd_searchbox_with_content.html
  searchTelemetryAd_searchbox_with_content.html^headers^
  serp.css
[browser_search_telemetry_engagement_target.js]
[browser_search_telemetry_engagement_redirect.js]
tags = search-telemetry
support-files =
  redirect_final.sjs
  redirect_once.sjs
  redirect_thrice.sjs
  redirect_twice.sjs
  searchTelemetryAd_adsLink_redirect.html
  searchTelemetryAd_components_text.html
  searchTelemetryAd_nonAdsLink_redirect.html
  searchTelemetryAd_nonAdsLink_redirect.html^headers^
  searchTelemetryAd_nonAdsLink_redirect_nonTopLoaded.html
  searchTelemetryAd_nonAdsLink_redirect_nonTopLoaded.html^headers^
  serp.css
[browser_search_telemetry_engagement_target.js]
tags = search-telemetry
support-files =
  searchTelemetryAd_components_text.html
  searchTelemetryAd_searchbox.html
  searchTelemetryAd_searchbox.html^headers^
  serp.css
+1 −3
Original line number Diff line number Diff line
@@ -19,9 +19,7 @@ const TEST_PROVIDER_INFO = [
    codeParamName: "abc",
    taggedCodes: ["ff"],
    adServerAttributes: ["mozAttr"],
    nonAdsLinkRegexps: [
      /^https:\/\/example.com\/browser\/browser\/components\/search\/test\/browser\//,
    ],
    nonAdsLinkRegexps: [],
    extraAdServersRegexps: [/^https:\/\/example\.com\/ad/],
    components: [
      {
+2 −5
Original line number Diff line number Diff line
@@ -323,11 +323,8 @@ add_task(async function test_click_related_search_in_new_tab() {
  BrowserTestUtils.removeTab(tab2);
});

// We consider regular expressions in nonAdsLinkRegexps and
// searchPageRegexp/extraPageRegexps as valid non ads links when recording
// an engagement event. However, if a nonAdsLinkRegexp leads to a
// searchPageRegexp/extraPageRegexps, than we risk double counting in the case
// of a re-direct occuring in a new tab.
// We consider regular expressions in nonAdsLinkRegexps and searchPageRegexp
// as valid non ads links when recording an engagement event.
add_task(async function test_click_redirect_search_in_newtab() {
  resetTelemetry();
  let url = getSERPUrl("searchTelemetryAd_searchbox_with_content.html");
Loading