Commit 77219fe4 authored by Barkin Simsek's avatar Barkin Simsek 🐢
Browse files

Add Opera Browser fetcher

parent 8cd87d0a
Pipeline #7337 passed with stages
in 2 minutes and 25 seconds
......@@ -14,6 +14,10 @@ CM_DOCKER_FIREFOX_BROWSER_CONTAINER_NAME=firefox-browser-container
CM_DOCKER_FIREFOX_BROWSER_CONTAINER_PORT=4445
CM_DOCKER_CHROME_BROWSER_CONTAINER_NAME=chrome-browser-container
CM_DOCKER_CHROME_BROWSER_CONTAINER_PORT=4446
CM_DOCKER_BRAVE_BROWSER_CONTAINER_NAME=brave-browser-container
CM_DOCKER_BRAVE_BROWSER_CONTAINER_PORT=4447
CM_DOCKER_OPERA_BROWSER_CONTAINER_NAME=opera-browser-container
CM_DOCKER_OPERA_BROWSER_CONTAINER_PORT=4448
CM_ASSET_HAR_EXPORT_EXTENSION_XPI=/src/captchamonitor/assets/har_export_trigger-0.6.1.xpi
CM_ASSET_HAR_EXPORT_EXTENSION_XPI_ID=harexporttrigger@getfirebug.com
CM_ASSET_HAR_EXPORT_EXTENSION_CRX=/src/captchamonitor/assets/har_export_trigger-0.6.1.crx
......
......@@ -10,6 +10,7 @@ from captchamonitor.utils.models import FetchQueue, FetchFailed, FetchCompleted
from captchamonitor.utils.exceptions import FetcherNotFound
from captchamonitor.utils.tor_launcher import TorLauncher
from captchamonitor.fetchers.tor_browser import TorBrowser
from captchamonitor.fetchers.opera_browser import OperaBrowser
from captchamonitor.fetchers.chrome_browser import ChromeBrowser
from captchamonitor.fetchers.firefox_browser import FirefoxBrowser
......@@ -47,7 +48,7 @@ class Worker:
self.__worker_id: str = worker_id
self.__tor_launcher: TorLauncher = TorLauncher(self.__config)
self.__job_queue_delay: float = float(self.__config["job_queue_delay"])
self.__fetcher: Union[TorBrowser, FirefoxBrowser, ChromeBrowser]
self.__fetcher: Union[TorBrowser, FirefoxBrowser, ChromeBrowser, OperaBrowser]
# Loop over the jobs
while loop:
......@@ -64,6 +65,7 @@ class Worker:
:raises FetcherNotFound: If requested fetcher is not available
"""
# pylint: disable=R0912
# Get claimed jobs by this worker
db_job = self.__db_session.query(FetchQueue).filter(
FetchQueue.claimed_by == self.__worker_id
......@@ -121,6 +123,15 @@ class Worker:
use_tor=job.ref_fetcher.uses_tor,
)
elif job.ref_fetcher.method == OperaBrowser.method_name_in_db:
self.__fetcher = OperaBrowser(
config=self.__config,
url=job.ref_url.url,
tor_launcher=self.__tor_launcher,
options=job.options,
use_tor=job.ref_fetcher.uses_tor,
)
else:
raise FetcherNotFound
......
import time
from selenium import webdriver
from captchamonitor.fetchers.base_fetcher import BaseFetcher
class OperaBrowser(BaseFetcher):
"""
Inherits and extends the BaseFetcher class to fetch URLs with Opera Browser
:param BaseFetcher: Inherits the BaseFetcher class
:type BaseFetcher: BaseFetcher class
"""
method_name_in_db = "opera_browser"
def setup(self) -> None:
"""
Prepares and starts the Opera Browser for fetching
"""
socks_host = self._tor_launcher.ip_address
socks_port = self._tor_launcher.socks_port
container_host = self._config["docker_opera_browser_container_name"]
container_port = self._config["docker_opera_browser_container_port"]
self._selenium_executor_url = self._get_selenium_executor_url(
container_host, container_port
)
self._desired_capabilities = webdriver.DesiredCapabilities.OPERA.copy()
self._selenium_options = webdriver.ChromeOptions()
# Install the extensions for Opera Browser
self._install_har_export_extension_crx(self._selenium_options)
# Enable the network monitoring tools to record HAR in Opera Browser
self._selenium_options.add_argument("--auto-open-devtools-for-tabs")
# Set connections to Tor if we need to use Tor with Opera Browser
if self.use_tor:
proxy = f"socks5://{socks_host}:{socks_port}"
self._selenium_options.add_argument(f"--proxy-server={proxy}")
def connect(self) -> None:
"""
Connects Selenium driver to Opera Browser Container
"""
self._connect_to_selenium_remote_web_driver(
container_name="Opera Browser",
desired_capabilities=self._desired_capabilities,
command_executor=self._selenium_executor_url,
options=self._selenium_options,
)
# Allows some time for HAR export trigger extension to initialize.
# Don't remove this sleep, otherwise HAR export trigger extension returns
# nothing and causes trouble with Opera Browser
time.sleep(1)
def fetch(self) -> None:
"""
Fetches the given URL using Opera Browser
"""
self._fetch_with_selenium_remote_web_driver()
......@@ -22,6 +22,10 @@ ENV_VARS = {
"docker_firefox_browser_container_port": "CM_DOCKER_FIREFOX_BROWSER_CONTAINER_PORT",
"docker_chrome_browser_container_name": "CM_DOCKER_CHROME_BROWSER_CONTAINER_NAME",
"docker_chrome_browser_container_port": "CM_DOCKER_CHROME_BROWSER_CONTAINER_PORT",
"docker_brave_browser_container_name": "CM_DOCKER_BRAVE_BROWSER_CONTAINER_NAME",
"docker_brave_browser_container_port": "CM_DOCKER_BRAVE_BROWSER_CONTAINER_PORT",
"docker_opera_browser_container_name": "CM_DOCKER_OPERA_BROWSER_CONTAINER_NAME",
"docker_opera_browser_container_port": "CM_DOCKER_OPERA_BROWSER_CONTAINER_PORT",
"asset_har_export_extension_xpi": "CM_ASSET_HAR_EXPORT_EXTENSION_XPI",
"asset_har_export_extension_xpi_id": "CM_ASSET_HAR_EXPORT_EXTENSION_XPI_ID",
"asset_har_export_extension_crx": "CM_ASSET_HAR_EXPORT_EXTENSION_CRX",
......
import unittest
import pytest
from captchamonitor.utils.config import Config
from captchamonitor.utils.tor_launcher import TorLauncher
from captchamonitor.fetchers.opera_browser import OperaBrowser
class TestOperaBrowser(unittest.TestCase):
def setUp(self):
self.config = Config()
self.tor_launcher = TorLauncher(self.config)
self.target_url = "https://check.torproject.org/"
def test_opera_browser_without_tor(self):
opera_browser = OperaBrowser(
config=self.config,
url=self.target_url,
tor_launcher=self.tor_launcher,
options={},
use_tor=False,
)
opera_browser.setup()
opera_browser.connect()
opera_browser.fetch()
self.assertIn("Sorry. You are not using Tor.", opera_browser.page_source)
def test_opera_browser_with_tor(self):
opera_browser = OperaBrowser(
config=self.config,
url=self.target_url,
tor_launcher=self.tor_launcher,
options={},
use_tor=True,
)
opera_browser.setup()
opera_browser.connect()
opera_browser.fetch()
self.assertIn(
"Congratulations. This browser is configured to use Tor.",
opera_browser.page_source,
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment