diff --git a/.env.sample b/.env.sample index 684e4cb1..6ec09d4a 100644 --- a/.env.sample +++ b/.env.sample @@ -1,6 +1,6 @@ -MYSQL_HOST=localhost:3306 +MYSQL_HOST=127.0.0.1 MYSQL_PORT=3306 -MYSQL_USER= -MYSQL_PASSWORD= +MYSQL_USER=root +MYSQL_PASSWORD=root MYSQL_DB_NAME=test_cc MYSQL_TABLE_NAME=ccindex diff --git a/cmoncrawl/aggregator/athena_query.py b/cmoncrawl/aggregator/athena_query.py index fa052779..8d984dba 100644 --- a/cmoncrawl/aggregator/athena_query.py +++ b/cmoncrawl/aggregator/athena_query.py @@ -122,7 +122,7 @@ def __init__( max_retry: int = 5, extra_sql_where_clause: str | None = None, batch_size: int = 1, - aws_profile: str = "default", + aws_profile: Optional[str] = None, bucket_name: Optional[str] = None, catalog_name: str = "AwsDataCatalog", database_name: str = "commoncrawl", diff --git a/tests/athena_tests.py b/tests/athena_tests.py index 50332017..6eac6f6b 100644 --- a/tests/athena_tests.py +++ b/tests/athena_tests.py @@ -6,7 +6,7 @@ from unittest.mock import patch import boto3 -from tests.utils import MySQLRecordsDB +from tests.utils import MySQLRecordsDB, set_up_aws_credentials_testing import aioboto3 from cmoncrawl.aggregator.athena_query import ( @@ -120,6 +120,7 @@ async def asyncSetUp(self) -> None: "https://index.commoncrawl.org/CC-MAIN-2021-09-index", "https://index.commoncrawl.org/CC-MAIN-2020-50-index", ] + set_up_aws_credentials_testing() def test_prepare_athena_sql_query_multiple_urls(self): query = prepare_athena_sql_query( @@ -242,6 +243,7 @@ def setUp(self) -> None: self.mock_s3.start() self.mock_athena = mock_athena() self.mock_athena.start() + set_up_aws_credentials_testing() def tearDown(self) -> None: self.mock_s3.stop() @@ -546,7 +548,7 @@ async def test_batch_size_zero(self): async def test_extra_sql_where(self): self.domains = ["seznam.cz"] - where_clause = "cc.fetch_status != 200" + where_clause = 'cc.warc_filename = "filename1"' self.iterator = AthenaAggregator.AthenaAggregatorIterator( aws_client=self.aws_client, domains=self.domains, diff --git a/tests/utils.py b/tests/utils.py index 6b4f2d50..e98ae567 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -27,7 +27,7 @@ def seed_db(self): 100, 200, "CC-MAIN-2021-05", - 100, + 200, "warc", ], [ @@ -130,3 +130,13 @@ def setUp(self): def tearDown(self): self.remove_db() self.db.close() + + +def set_up_aws_credentials_testing(): + import os + + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "us-east-1"