Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Barkin Simsek
CAPTCHA-Monitor
Commits
7a3993b8
Commit
7a3993b8
authored
Jun 07, 2021
by
Barkin Simsek
🐢
Browse files
Add HAR exporting functionality to Chrome browser (
#76
)
parent
4ad8574b
Pipeline
#7297
passed with stages
in 2 minutes and 25 seconds
Changes
6
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
.env.example
View file @
7a3993b8
...
...
@@ -15,5 +15,6 @@ CM_DOCKER_FIREFOX_BROWSER_CONTAINER_PORT=4445
CM_DOCKER_CHROME_BROWSER_CONTAINER_NAME=chrome-browser-container
CM_DOCKER_CHROME_BROWSER_CONTAINER_PORT=4446
CM_ASSET_HAR_EXPORT_EXTENSION_XPI=/src/captchamonitor/assets/har_export_trigger-0.6.1.xpi
CM_ASSET_HAR_EXPORT_EXTENSION_ID=harexporttrigger@getfirebug.com
CM_ASSET_HAR_EXPORT_EXTENSION_XPI_ID=harexporttrigger@getfirebug.com
CM_ASSET_HAR_EXPORT_EXTENSION_CRX=/src/captchamonitor/assets/har_export_trigger-0.6.1.crx
CM_JOB_QUEUE_DELAY=1
\ No newline at end of file
src/captchamonitor/fetchers/base_fetcher.py
View file @
7a3993b8
...
...
@@ -10,15 +10,15 @@ from selenium.common.exceptions import WebDriverException
from
captchamonitor.utils.config
import
Config
from
captchamonitor.utils.exceptions
import
(
FetcherURLFetchError
,
HarExportExtensionError
,
FetcherConnectionInitError
,
HarExportExtensionXpiError
,
)
from
captchamonitor.utils.tor_launcher
import
TorLauncher
class
BaseFetcher
:
"""
Base fetcher class that will inherited by the actual fetchers, used to unify
Base fetcher class that will
be
inherited by the actual fetchers, used to unify
the fetcher interfaces
"""
...
...
@@ -49,7 +49,6 @@ class BaseFetcher:
:type options: Optional[dict], optional
:param use_tor: Should I connect the fetcher to Tor? Has no effect when using Tor Browser, defaults to True
:type use_tor: bool
:raises HarExportExtensionXpiError: If HAR export extension cannot be used
"""
# Public attributes
self
.
url
:
str
=
url
...
...
@@ -73,28 +72,23 @@ class BaseFetcher:
self
.
_num_retries_on_fail
:
int
=
3
self
.
_delay_in_seconds_between_retries
:
int
=
3
# Get the extension path
self
.
_har_export_extension_xpi
=
self
.
_config
[
"asset_har_export_extension_xpi"
]
# Get the extension path for xpi
self
.
_har_export_extension_xpi
:
str
=
self
.
_config
[
"asset_har_export_extension_xpi"
]
# Get the extension id
self
.
_har_export_extension_xpi_id
=
self
.
_config
[
# Get the extension id
for xpi
self
.
_har_export_extension_xpi_id
:
str
=
self
.
_config
[
"asset_har_export_extension_xpi_id"
]
# Check if the har extension path is a file and a xpi file
if
not
os
.
path
.
isfile
(
self
.
_har_export_extension_xpi
):
self
.
_logger
.
warning
(
"Provided extension file doesn't exist: %s"
,
self
.
_har_export_extension_xpi
,
)
raise
HarExportExtensionXpiError
# Get the extension path for crx
self
.
_har_export_extension_crx
:
str
=
self
.
_config
[
"asset_har_export_extension_crx"
]
if
not
self
.
_har_export_extension_xpi
.
endswith
(
".xpi"
):
self
.
_logger
.
warning
(
"Provided extension file is not valid: %s"
,
self
.
_har_export_extension_xpi
,
)
raise
HarExportExtensionXpiError
self
.
_check_extension_validity
(
self
.
_har_export_extension_xpi
,
".xpi"
)
self
.
_check_extension_validity
(
self
.
_har_export_extension_crx
,
".crx"
)
@
staticmethod
def
_get_selenium_executor_url
(
container_host
:
str
,
container_port
:
str
)
->
str
:
...
...
@@ -161,15 +155,39 @@ class BaseFetcher:
# Set driver timeout
self
.
driver
.
set_page_load_timeout
(
self
.
page_timeout
)
# Set timeout for HAR export trigger
# Set timeout for HAR export trigger
extension
self
.
driver
.
set_script_timeout
(
self
.
script_timeout
)
# Log the current status
self
.
_logger
.
debug
(
"Connected to the %s container"
,
container_name
)
def
_install_har_export_extension
(
self
,
directory
:
str
)
->
None
:
def
_check_extension_validity
(
self
,
extension
:
str
,
endswith
:
str
)
->
None
:
"""
Checks if given extension file exists and is valid
:param extension: Absolute path to the extension file
:type extension: str
:param endswith: The file extension for the browser extension
:type endswith: str
:raises HarExportExtensionError: If given extension is not valid
"""
if
not
os
.
path
.
isfile
(
extension
):
self
.
_logger
.
warning
(
"Provided extension file doesn't exist: %s"
,
extension
,
)
raise
HarExportExtensionError
if
not
extension
.
endswith
(
endswith
):
self
.
_logger
.
warning
(
"Provided extension file is not valid: %s"
,
extension
,
)
raise
HarExportExtensionError
def
_install_har_export_extension_xpi
(
self
,
directory
:
str
)
->
None
:
"""
Installs the HAR Export Trigger extension
Installs the HAR Export Trigger extension
to Firefox based browsers
:param directory: Absolute directory path to install the extension
:type directory: str
...
...
@@ -180,6 +198,17 @@ class BaseFetcher:
os
.
chmod
(
directory
,
0o755
)
shutil
.
copy
(
self
.
_har_export_extension_xpi
,
addon_path
+
".xpi"
)
def
_install_har_export_extension_crx
(
self
,
chrome_options
:
webdriver
.
ChromeOptions
)
->
None
:
"""
Installs the HAR Export Trigger extension to Chromium based browsers
:param chrome_options: webdriver.ChromeOptions from the Selenium driver
:type chrome_options: webdriver.ChromeOptions
"""
chrome_options
.
add_extension
(
self
.
_har_export_extension_crx
)
def
_fetch_with_selenium_remote_web_driver
(
self
)
->
None
:
"""
Fetches the given URL with the remote web driver
...
...
@@ -232,5 +261,8 @@ class BaseFetcher:
return
self
.
driver
.
get_screenshot_as_png
()
def
__del__
(
self
)
->
None
:
"""
Clean up before going out of scope
"""
if
hasattr
(
self
,
"driver"
):
self
.
driver
.
quit
()
src/captchamonitor/fetchers/chrome_browser.py
View file @
7a3993b8
import
time
from
selenium
import
webdriver
from
captchamonitor.fetchers.base_fetcher
import
BaseFetcher
...
...
@@ -27,9 +29,15 @@ class ChromeBrowser(BaseFetcher):
container_host
,
container_port
)
self
.
_desired_capabilities
=
webdriver
.
DesiredCapabilities
.
CHROME
self
.
_desired_capabilities
=
webdriver
.
DesiredCapabilities
.
CHROME
.
copy
()
self
.
_selenium_options
=
webdriver
.
ChromeOptions
()
# Install the extensions
self
.
_install_har_export_extension_crx
(
self
.
_selenium_options
)
# Enable the network monitoring tools to record HAR in Chrome Browser
self
.
_selenium_options
.
add_argument
(
"--auto-open-devtools-for-tabs"
)
# Set connections to Tor if we need to use Tor
if
self
.
use_tor
:
proxy
=
f
"socks5://
{
socks_host
}
:
{
socks_port
}
"
...
...
@@ -46,6 +54,11 @@ class ChromeBrowser(BaseFetcher):
options
=
self
.
_selenium_options
,
)
# Allows some time for HAR export trigger extension to initialize
# Don't remove this sleep, otherwise HAR export trigger extension returns
# nothing and causes trouble
time
.
sleep
(
1
)
def
fetch
(
self
)
->
None
:
"""
Fetches the given URL using Chrome Browser
...
...
src/captchamonitor/utils/config.py
View file @
7a3993b8
...
...
@@ -23,7 +23,8 @@ ENV_VARS = {
"docker_chrome_browser_container_name"
:
"CM_DOCKER_CHROME_BROWSER_CONTAINER_NAME"
,
"docker_chrome_browser_container_port"
:
"CM_DOCKER_CHROME_BROWSER_CONTAINER_PORT"
,
"asset_har_export_extension_xpi"
:
"CM_ASSET_HAR_EXPORT_EXTENSION_XPI"
,
"asset_har_export_extension_xpi_id"
:
"CM_ASSET_HAR_EXPORT_EXTENSION_ID"
,
"asset_har_export_extension_xpi_id"
:
"CM_ASSET_HAR_EXPORT_EXTENSION_XPI_ID"
,
"asset_har_export_extension_crx"
:
"CM_ASSET_HAR_EXPORT_EXTENSION_CRX"
,
"job_queue_delay"
:
"CM_JOB_QUEUE_DELAY"
,
}
...
...
src/captchamonitor/utils/exceptions.py
View file @
7a3993b8
...
...
@@ -64,9 +64,9 @@ class WorkerInitError(Error):
return
"WorkerInitError: Worker initialization error"
class
HarExportExtension
Xpi
Error
(
Error
):
class
HarExportExtensionError
(
Error
):
def
__str__
(
self
)
->
str
:
return
"HarExportExtension
Xpi
Error: Provided Har Export Trigger extension is not valid"
return
"HarExportExtensionError: Provided Har Export Trigger extension is not valid"
class
FetcherConnectionInitError
(
Error
):
...
...
tests/integration/test_chrome_browser.py
View file @
7a3993b8
...
...
@@ -13,7 +13,6 @@ class TestChromeBrowser(unittest.TestCase):
self
.
tor_launcher
=
TorLauncher
(
self
.
config
)
self
.
target_url
=
"https://check.torproject.org/"
@
pytest
.
mark
.
skip
(
reason
=
"Need to implement HAR exporting in Chrome Browser"
)
def
test_chrome_browser_without_tor
(
self
):
chrome_browser
=
ChromeBrowser
(
config
=
self
.
config
,
...
...
@@ -29,7 +28,6 @@ class TestChromeBrowser(unittest.TestCase):
self
.
assertIn
(
"Sorry. You are not using Tor."
,
chrome_browser
.
page_source
)
@
pytest
.
mark
.
skip
(
reason
=
"Need to implement HAR exporting in Chrome Browser"
)
def
test_chrome_browser_with_tor
(
self
):
chrome_browser
=
ChromeBrowser
(
config
=
self
.
config
,
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment