Commit 7a3993b8 authored by Barkin Simsek's avatar Barkin Simsek 🐢
Browse files

Add HAR exporting functionality to Chrome browser (#76)

parent 4ad8574b
Pipeline #7297 passed with stages
in 2 minutes and 25 seconds
......@@ -15,5 +15,6 @@ CM_DOCKER_FIREFOX_BROWSER_CONTAINER_PORT=4445
CM_DOCKER_CHROME_BROWSER_CONTAINER_NAME=chrome-browser-container
CM_DOCKER_CHROME_BROWSER_CONTAINER_PORT=4446
CM_ASSET_HAR_EXPORT_EXTENSION_XPI=/src/captchamonitor/assets/har_export_trigger-0.6.1.xpi
CM_ASSET_HAR_EXPORT_EXTENSION_ID=harexporttrigger@getfirebug.com
CM_ASSET_HAR_EXPORT_EXTENSION_XPI_ID=harexporttrigger@getfirebug.com
CM_ASSET_HAR_EXPORT_EXTENSION_CRX=/src/captchamonitor/assets/har_export_trigger-0.6.1.crx
CM_JOB_QUEUE_DELAY=1
\ No newline at end of file
......@@ -10,15 +10,15 @@ from selenium.common.exceptions import WebDriverException
from captchamonitor.utils.config import Config
from captchamonitor.utils.exceptions import (
FetcherURLFetchError,
HarExportExtensionError,
FetcherConnectionInitError,
HarExportExtensionXpiError,
)
from captchamonitor.utils.tor_launcher import TorLauncher
class BaseFetcher:
"""
Base fetcher class that will inherited by the actual fetchers, used to unify
Base fetcher class that will be inherited by the actual fetchers, used to unify
the fetcher interfaces
"""
......@@ -49,7 +49,6 @@ class BaseFetcher:
:type options: Optional[dict], optional
:param use_tor: Should I connect the fetcher to Tor? Has no effect when using Tor Browser, defaults to True
:type use_tor: bool
:raises HarExportExtensionXpiError: If HAR export extension cannot be used
"""
# Public attributes
self.url: str = url
......@@ -73,28 +72,23 @@ class BaseFetcher:
self._num_retries_on_fail: int = 3
self._delay_in_seconds_between_retries: int = 3
# Get the extension path
self._har_export_extension_xpi = self._config["asset_har_export_extension_xpi"]
# Get the extension path for xpi
self._har_export_extension_xpi: str = self._config[
"asset_har_export_extension_xpi"
]
# Get the extension id
self._har_export_extension_xpi_id = self._config[
# Get the extension id for xpi
self._har_export_extension_xpi_id: str = self._config[
"asset_har_export_extension_xpi_id"
]
# Check if the har extension path is a file and a xpi file
if not os.path.isfile(self._har_export_extension_xpi):
self._logger.warning(
"Provided extension file doesn't exist: %s",
self._har_export_extension_xpi,
)
raise HarExportExtensionXpiError
# Get the extension path for crx
self._har_export_extension_crx: str = self._config[
"asset_har_export_extension_crx"
]
if not self._har_export_extension_xpi.endswith(".xpi"):
self._logger.warning(
"Provided extension file is not valid: %s",
self._har_export_extension_xpi,
)
raise HarExportExtensionXpiError
self._check_extension_validity(self._har_export_extension_xpi, ".xpi")
self._check_extension_validity(self._har_export_extension_crx, ".crx")
@staticmethod
def _get_selenium_executor_url(container_host: str, container_port: str) -> str:
......@@ -161,15 +155,39 @@ class BaseFetcher:
# Set driver timeout
self.driver.set_page_load_timeout(self.page_timeout)
# Set timeout for HAR export trigger
# Set timeout for HAR export trigger extension
self.driver.set_script_timeout(self.script_timeout)
# Log the current status
self._logger.debug("Connected to the %s container", container_name)
def _install_har_export_extension(self, directory: str) -> None:
def _check_extension_validity(self, extension: str, endswith: str) -> None:
"""
Checks if given extension file exists and is valid
:param extension: Absolute path to the extension file
:type extension: str
:param endswith: The file extension for the browser extension
:type endswith: str
:raises HarExportExtensionError: If given extension is not valid
"""
if not os.path.isfile(extension):
self._logger.warning(
"Provided extension file doesn't exist: %s",
extension,
)
raise HarExportExtensionError
if not extension.endswith(endswith):
self._logger.warning(
"Provided extension file is not valid: %s",
extension,
)
raise HarExportExtensionError
def _install_har_export_extension_xpi(self, directory: str) -> None:
"""
Installs the HAR Export Trigger extension
Installs the HAR Export Trigger extension to Firefox based browsers
:param directory: Absolute directory path to install the extension
:type directory: str
......@@ -180,6 +198,17 @@ class BaseFetcher:
os.chmod(directory, 0o755)
shutil.copy(self._har_export_extension_xpi, addon_path + ".xpi")
def _install_har_export_extension_crx(
self, chrome_options: webdriver.ChromeOptions
) -> None:
"""
Installs the HAR Export Trigger extension to Chromium based browsers
:param chrome_options: webdriver.ChromeOptions from the Selenium driver
:type chrome_options: webdriver.ChromeOptions
"""
chrome_options.add_extension(self._har_export_extension_crx)
def _fetch_with_selenium_remote_web_driver(self) -> None:
"""
Fetches the given URL with the remote web driver
......@@ -232,5 +261,8 @@ class BaseFetcher:
return self.driver.get_screenshot_as_png()
def __del__(self) -> None:
"""
Clean up before going out of scope
"""
if hasattr(self, "driver"):
self.driver.quit()
import time
from selenium import webdriver
from captchamonitor.fetchers.base_fetcher import BaseFetcher
......@@ -27,9 +29,15 @@ class ChromeBrowser(BaseFetcher):
container_host, container_port
)
self._desired_capabilities = webdriver.DesiredCapabilities.CHROME
self._desired_capabilities = webdriver.DesiredCapabilities.CHROME.copy()
self._selenium_options = webdriver.ChromeOptions()
# Install the extensions
self._install_har_export_extension_crx(self._selenium_options)
# Enable the network monitoring tools to record HAR in Chrome Browser
self._selenium_options.add_argument("--auto-open-devtools-for-tabs")
# Set connections to Tor if we need to use Tor
if self.use_tor:
proxy = f"socks5://{socks_host}:{socks_port}"
......@@ -46,6 +54,11 @@ class ChromeBrowser(BaseFetcher):
options=self._selenium_options,
)
# Allows some time for HAR export trigger extension to initialize
# Don't remove this sleep, otherwise HAR export trigger extension returns
# nothing and causes trouble
time.sleep(1)
def fetch(self) -> None:
"""
Fetches the given URL using Chrome Browser
......
......@@ -23,7 +23,8 @@ ENV_VARS = {
"docker_chrome_browser_container_name": "CM_DOCKER_CHROME_BROWSER_CONTAINER_NAME",
"docker_chrome_browser_container_port": "CM_DOCKER_CHROME_BROWSER_CONTAINER_PORT",
"asset_har_export_extension_xpi": "CM_ASSET_HAR_EXPORT_EXTENSION_XPI",
"asset_har_export_extension_xpi_id": "CM_ASSET_HAR_EXPORT_EXTENSION_ID",
"asset_har_export_extension_xpi_id": "CM_ASSET_HAR_EXPORT_EXTENSION_XPI_ID",
"asset_har_export_extension_crx": "CM_ASSET_HAR_EXPORT_EXTENSION_CRX",
"job_queue_delay": "CM_JOB_QUEUE_DELAY",
}
......
......@@ -64,9 +64,9 @@ class WorkerInitError(Error):
return "WorkerInitError: Worker initialization error"
class HarExportExtensionXpiError(Error):
class HarExportExtensionError(Error):
def __str__(self) -> str:
return "HarExportExtensionXpiError: Provided Har Export Trigger extension is not valid"
return "HarExportExtensionError: Provided Har Export Trigger extension is not valid"
class FetcherConnectionInitError(Error):
......
......@@ -13,7 +13,6 @@ class TestChromeBrowser(unittest.TestCase):
self.tor_launcher = TorLauncher(self.config)
self.target_url = "https://check.torproject.org/"
@pytest.mark.skip(reason="Need to implement HAR exporting in Chrome Browser")
def test_chrome_browser_without_tor(self):
chrome_browser = ChromeBrowser(
config=self.config,
......@@ -29,7 +28,6 @@ class TestChromeBrowser(unittest.TestCase):
self.assertIn("Sorry. You are not using Tor.", chrome_browser.page_source)
@pytest.mark.skip(reason="Need to implement HAR exporting in Chrome Browser")
def test_chrome_browser_with_tor(self):
chrome_browser = ChromeBrowser(
config=self.config,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment