diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..022e0215 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,22 @@ +name: build + +on: [push] + +jobs: + test: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Test with unittest + run: | + python -m unittest diff --git a/README.md b/README.md index d744a029..fe662ddf 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ devoted Google Cloud project to run in. It is not recommended to run yourself, but the code is made available for anyone who wants to understand how the data pipeline works. +![Master Build Status](https://github.com/Jigsaw-Code/censoredplanet-analysis/workflows/build/badge.svg?branch=master) + ## System Diagram ![System Diagram](system-diagram.svg) diff --git a/pipeline/manual_e2e_test.py b/pipeline/manual_e2e_test.py index f5643774..d4f93883 100644 --- a/pipeline/manual_e2e_test.py +++ b/pipeline/manual_e2e_test.py @@ -20,6 +20,7 @@ The local pipeline runs twice, once for a full load, and once incrementally. """ +import datetime import os import pwd from typing import List @@ -32,6 +33,7 @@ import firehook_resources from pipeline import run_beam_tables +from pipeline.metadata import ip_metadata # The test table is written into the : dataset username = pwd.getpwuid(os.getuid()).pw_name @@ -142,6 +144,16 @@ def test_pipeline_e2e(self): finally: clean_up_bq_table(client, BQ_TEST_TABLE) + def test_ipmetadata_init(self): + # This E2E test requires the user to have get access to the + # gs://censoredplanet_geolocation bucket. + ip_metadata_db = ip_metadata.get_firehook_ip_metadata_db( + datetime.date(2018, 7, 27)) + metadata = ip_metadata_db.lookup('1.1.1.1') + + self.assertEqual(metadata, ('1.1.1.0/24', 13335, 'CLOUDFLARENET', + 'Cloudflare, Inc.', 'Content', 'US')) + # This test is not run by default in unittest because it takes about a minute # to run, plus it reads from and writes to bigquery. diff --git a/pipeline/metadata/test_ip_metadata.py b/pipeline/metadata/test_ip_metadata.py index b66158f9..f217a11b 100644 --- a/pipeline/metadata/test_ip_metadata.py +++ b/pipeline/metadata/test_ip_metadata.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime from typing import Iterable import unittest @@ -21,16 +20,6 @@ class IpMetadataTest(unittest.TestCase): - def test_init_and_lookup(self): - # This E2E test requires the user to have get access to the - # gs://censoredplanet_geolocation bucket. - ip_metadata_db = ip_metadata.get_firehook_ip_metadata_db( - datetime.date(2018, 7, 27)) - metadata = ip_metadata_db.lookup("1.1.1.1") - - self.assertEqual(metadata, ("1.1.1.0/24", 13335, "CLOUDFLARENET", - "Cloudflare, Inc.", "Content", "US")) - def test_read_compressed_file(self): filepath = "pipeline/metadata/test_file.txt.gz" lines = [line for line in ip_metadata._read_compressed_file(filepath)]