Commit 8c57ef63 authored by Barkin Simsek's avatar Barkin Simsek 🐢
Browse files

Update calls in the codebase to reflect database table changes

parent 8fd45ed5
Pipeline #7348 passed with stages
in 2 minutes and 25 seconds
......@@ -36,7 +36,7 @@ if args.worker:
cm.worker()
elif args.update:
logger.info("Intializing CAPTCHA Monitor in update mode")
schedule.every().day.do(cm.update_urls)
schedule.every().day.do(cm.update_domains)
schedule.every().hour.do(cm.update_relays)
# Run all scheduled jobs at the beginning
......
......@@ -8,7 +8,7 @@ from captchamonitor.utils.config import Config
from captchamonitor.utils.database import Database
from captchamonitor.utils.exceptions import ConfigInitError, DatabaseInitError
from captchamonitor.core.update_relays import UpdateRelays
from captchamonitor.core.update_website import UpdateWebsite
from captchamonitor.core.update_domains import UpdateDomains
from captchamonitor.utils.small_scripts import node_id
......@@ -82,13 +82,13 @@ class CaptchaMonitor:
db_session=self.__db_session,
)
def update_urls(self) -> None:
def update_domains(self) -> None:
"""
Updates the list of URLs in the database
Updates the list of domains in the database
"""
self.__logger.info("Started updating URLs")
self.__logger.info("Started updating domains")
UpdateWebsite(config=self.__config, db_session=self.__db_session)
UpdateDomains(config=self.__config, db_session=self.__db_session)
def update_relays(self) -> None:
"""
......
......@@ -6,11 +6,11 @@ import pytz
from sqlalchemy.orm import sessionmaker
from captchamonitor.utils.config import Config
from captchamonitor.utils.models import URL
from captchamonitor.utils.models import Domain
from captchamonitor.utils.website_parser import WebsiteParser
class UpdateWebsite:
class UpdateDomains:
"""
Fetches Alexa topsites and Moz500 website and parses the list of urls in the website and inserts the urls listed there into the
database
......@@ -23,7 +23,7 @@ class UpdateWebsite:
auto_update: bool = True,
) -> None:
"""
Initializes UpdateWebsites
Initializes UpdateDomains
:param config: The config class instance that contains global configuration values
:type config: Config
......@@ -52,11 +52,11 @@ class UpdateWebsite:
"""
# Iterate over the websites in consensus file
for website in website_list:
query = self.__db_session.query(URL).filter(URL.url == website)
query = self.__db_session.query(Domain).filter(Domain.domain == website)
if query.count() == 0:
# Add new website
db_website = URL(
url=website,
db_website = Domain(
domain=website,
supports_http=True,
supports_https=False,
supports_ftp=False,
......@@ -68,7 +68,7 @@ class UpdateWebsite:
else:
db_website = query.first()
db_website.updated_at = datetime.now(pytz.utc)
db_website.url = website
db_website.domain = website
db_website.supports_http = True
db_website.supports_https = False
db_website.supports_ftp = False
......
......@@ -99,7 +99,7 @@ class Worker:
options_dict.update(job.options)
self.__fetcher = TorBrowser(
config=self.__config,
url=job.ref_url.url,
url=job.url,
tor_launcher=self.__tor_launcher,
options=options_dict,
use_tor=job.ref_fetcher.uses_tor,
......@@ -108,7 +108,7 @@ class Worker:
elif job.ref_fetcher.method == FirefoxBrowser.method_name_in_db:
self.__fetcher = FirefoxBrowser(
config=self.__config,
url=job.ref_url.url,
url=job.url,
tor_launcher=self.__tor_launcher,
options=job.options,
use_tor=job.ref_fetcher.uses_tor,
......@@ -117,7 +117,7 @@ class Worker:
elif job.ref_fetcher.method == ChromeBrowser.method_name_in_db:
self.__fetcher = ChromeBrowser(
config=self.__config,
url=job.ref_url.url,
url=job.url,
tor_launcher=self.__tor_launcher,
options=job.options,
use_tor=job.ref_fetcher.uses_tor,
......@@ -126,7 +126,7 @@ class Worker:
elif job.ref_fetcher.method == OperaBrowser.method_name_in_db:
self.__fetcher = OperaBrowser(
config=self.__config,
url=job.ref_url.url,
url=job.url,
tor_launcher=self.__tor_launcher,
options=job.options,
use_tor=job.ref_fetcher.uses_tor,
......@@ -143,19 +143,20 @@ class Worker:
except Exception as exception:
# If failed, put into the failed table
failed = FetchFailed(
url=job.url,
options=job.options,
tbb_security_level=job.tbb_security_level,
captcha_monitor_version=self.__config["version"],
fail_reason=str(exception),
fetcher_id=job.fetcher_id,
url_id=job.url_id,
domain_id=job.domain_id,
relay_id=job.relay_id,
)
self.__db_session.add(failed)
self.__logger.debug(
"Worker %s wasn't able to fetch URL id %s with %s: %s",
"Worker %s wasn't able to fetch %s with %s: %s",
self.__worker_id,
job.url_id,
job.url,
job.fetcher_id,
str(exception),
)
......@@ -163,20 +164,21 @@ class Worker:
else:
# If successful, put into the completed table
completed = FetchCompleted(
url=job.url,
options=job.options,
tbb_security_level=job.tbb_security_level,
captcha_monitor_version=self.__config["version"],
html_data=self.__fetcher.page_source,
http_requests=self.__fetcher.page_har,
fetcher_id=job.fetcher_id,
url_id=job.url_id,
domain_id=job.domain_id,
relay_id=job.relay_id,
)
self.__db_session.add(completed)
self.__logger.debug(
"Worker %s successfully fetched URL id %s with %s",
"Worker %s successfully fetched %s with %s",
self.__worker_id,
job.url_id,
job.url,
job.fetcher_id,
)
......
......@@ -5,13 +5,13 @@ import pytest
from freezegun import freeze_time
from captchamonitor.utils.config import Config
from captchamonitor.utils.models import URL
from captchamonitor.utils.models import Domain
from captchamonitor.utils.database import Database
from captchamonitor.core.update_website import UpdateWebsite
from captchamonitor.core.update_domains import UpdateDomains
from captchamonitor.utils.website_parser import WebsiteParser
class TestUpdateWebsite(unittest.TestCase):
class TestUpdateDomains(unittest.TestCase):
def setUp(self):
self.config = Config()
self.database = Database(
......@@ -22,7 +22,7 @@ class TestUpdateWebsite(unittest.TestCase):
self.config["db_password"],
)
self.db_session = self.database.session()
self.db_website_query = self.db_session.query(URL)
self.db_website_query = self.db_session.query(Domain)
self.alexa_url_count = 50
self.moz_url_count = 500
......@@ -30,7 +30,7 @@ class TestUpdateWebsite(unittest.TestCase):
self.db_session.close()
def test__insert_alexa_website_into_db(self):
update_website = UpdateWebsite(
update_domains = UpdateDomains(
config=self.config, db_session=self.db_session, auto_update=False
)
......@@ -42,14 +42,14 @@ class TestUpdateWebsite(unittest.TestCase):
# Check if the url table is empty
self.assertEqual(self.db_website_query.count(), 0)
update_website._UpdateWebsite__insert_website_into_db(website_data)
update_domains._UpdateDomains__insert_website_into_db(website_data)
# Check if the url table was populated with correct data
self.assertEqual(self.db_website_query.count(), self.alexa_url_count)
self.assertEqual(website_data[0], self.db_website_query.first().url)
self.assertEqual(website_data[0], self.db_website_query.first().domain)
def test__insert_moz_website_into_db(self):
update_website = UpdateWebsite(
update_domains = UpdateDomains(
config=self.config, db_session=self.db_session, auto_update=False
)
......@@ -61,15 +61,15 @@ class TestUpdateWebsite(unittest.TestCase):
# Check if the url table is empty
self.assertEqual(self.db_website_query.count(), 0)
update_website._UpdateWebsite__insert_website_into_db(website_data)
update_domains._UpdateDomains__insert_website_into_db(website_data)
# Check if the url table was populated with correct data
self.assertEqual(self.db_website_query.count(), self.moz_url_count)
self.assertEqual(website_data[0], self.db_website_query.first().url)
self.assertEqual(website_data[0], self.db_website_query.first().domain)
def test_update_url_init_with_already_populated_table(self):
# Prepopulate the table
update_website = UpdateWebsite(
update_domains = UpdateDomains(
config=self.config, db_session=self.db_session, auto_update=False
)
# Check if the url table is empty
......@@ -80,18 +80,18 @@ class TestUpdateWebsite(unittest.TestCase):
website_list.get_alexa_top_50()
website_data = website_list.website_list
update_website._UpdateWebsite__insert_website_into_db(website_data)
update_domains._UpdateDomains__insert_website_into_db(website_data)
# Check if the url table was populated with correct data
self.assertEqual(self.db_website_query.count(), self.alexa_url_count)
self.assertEqual(self.db_website_query.first().url, website_data[0])
self.assertEqual(self.db_website_query.first().domain, website_data[0])
# Try inserting the same url again with different details
update_website._UpdateWebsite__insert_website_into_db(website_data)
update_domains._UpdateDomains__insert_website_into_db(website_data)
# Make sure there still only one url
self.assertEqual(self.db_website_query.count(), self.alexa_url_count)
self.assertEqual(self.db_website_query.first().url, website_data[0])
self.assertEqual(self.db_website_query.first().domain, website_data[0])
# Add lists from moz website on top of alexa websites
website_list.get_moz_top_500()
......@@ -99,7 +99,7 @@ class TestUpdateWebsite(unittest.TestCase):
# Unique length of website
unique_length_of_website = len(website_list.uniq_website_list)
update_website._UpdateWebsite__insert_website_into_db(website_data)
update_domains._UpdateDomains__insert_website_into_db(website_data)
# Check if the count of url table is equal to the length of unique websites
self.assertEqual(self.db_website_query.count(), unique_length_of_website)
......@@ -5,8 +5,8 @@ import pytest
from captchamonitor.core.worker import Worker
from captchamonitor.utils.config import Config
from captchamonitor.utils.models import (
URL,
Relay,
Domain,
Fetcher,
FetchQueue,
FetchFailed,
......@@ -35,8 +35,8 @@ class TestWorker(unittest.TestCase):
)
# Add test urls
test_url_success = URL(
url="https://check.torproject.org/",
test_domain_success = Domain(
domain="check.torproject.org",
supports_http=True,
supports_https=True,
supports_ftp=False,
......@@ -44,10 +44,10 @@ class TestWorker(unittest.TestCase):
supports_ipv6=False,
requires_multiple_requests=True,
)
self.db_session.add(test_url_success)
self.db_session.add(test_domain_success)
test_url_fail = URL(
url="https://StupidURL",
test_domain_fail = Domain(
domain="stupid.urlextension",
supports_http=True,
supports_https=True,
supports_ftp=False,
......@@ -55,7 +55,7 @@ class TestWorker(unittest.TestCase):
supports_ipv6=False,
requires_multiple_requests=True,
)
self.db_session.add(test_url_fail)
self.db_session.add(test_domain_fail)
# Add test relay
test_relay = Relay(
......@@ -85,7 +85,12 @@ class TestWorker(unittest.TestCase):
def test_worker_single_run_without_tor_success(self):
# Insert a job
new_job = FetchQueue(fetcher_id=1, url_id=1, relay_id=1)
new_job = FetchQueue(
url="https://check.torproject.org",
fetcher_id=1,
domain_id=1,
relay_id=1,
)
self.db_session.add(new_job)
# Commit changes to the database
......@@ -99,11 +104,16 @@ class TestWorker(unittest.TestCase):
self.worker.process_next_job()
self.assertNotEqual(db_job.count(), 0)
self.assertEqual(db_job.first().ref_url.url, "https://check.torproject.org/")
self.assertEqual(db_job.first().url, "https://check.torproject.org")
def test_worker_single_run_without_tor_fail(self):
# Insert a job
new_job = FetchQueue(fetcher_id=1, url_id=2, relay_id=1)
new_job = FetchQueue(
url="https://stupid.urlextension",
fetcher_id=1,
domain_id=2,
relay_id=1,
)
self.db_session.add(new_job)
# Commit changes to the database
......@@ -117,11 +127,16 @@ class TestWorker(unittest.TestCase):
self.worker.process_next_job()
self.assertNotEqual(db_job.count(), 0)
self.assertEqual(db_job.first().ref_url.url, "https://StupidURL")
self.assertEqual(db_job.first().url, "https://stupid.urlextension")
def test_worker_single_run_with_tor_success(self):
# Insert a job
new_job = FetchQueue(fetcher_id=2, url_id=1, relay_id=1)
new_job = FetchQueue(
url="https://check.torproject.org",
fetcher_id=2,
domain_id=1,
relay_id=1,
)
self.db_session.add(new_job)
# Commit changes to the database
......@@ -135,11 +150,16 @@ class TestWorker(unittest.TestCase):
self.worker.process_next_job()
self.assertNotEqual(db_job.count(), 0)
self.assertEqual(db_job.first().ref_url.url, "https://check.torproject.org/")
self.assertEqual(db_job.first().url, "https://check.torproject.org")
def test_worker_single_run_with_tor_fail(self):
# Insert a job
new_job = FetchQueue(fetcher_id=2, url_id=2, relay_id=1)
new_job = FetchQueue(
url="https://stupid.urlextension",
fetcher_id=2,
domain_id=2,
relay_id=1,
)
self.db_session.add(new_job)
# Commit changes to the database
......@@ -153,7 +173,7 @@ class TestWorker(unittest.TestCase):
self.worker.process_next_job()
self.assertNotEqual(db_job.count(), 0)
self.assertEqual(db_job.first().ref_url.url, "https://StupidURL")
self.assertEqual(db_job.first().url, "https://stupid.urlextension")
def test_worker_no_job_in_queue(self):
# Process the job, shouldn't rise any errors
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment