From a30c5b3e16c586238eba425a97a76cb593319b34 Mon Sep 17 00:00:00 2001 From: JYOTHINARAYANSETTY <49671483+JYOTHINARAYANSETTY@users.noreply.github.com> Date: Thu, 31 Aug 2023 11:16:47 -0400 Subject: [PATCH] TDl-19503 (#225) * TDl-19503 * new base file --- tests/base_new_frmwrk.py | 168 +++++++++++++++++++++++++++++ tests/test_facebook_discovery.py | 122 ++------------------- tests/test_facebook_table_reset.py | 62 +++++++++++ 3 files changed, 237 insertions(+), 115 deletions(-) create mode 100644 tests/base_new_frmwrk.py create mode 100644 tests/test_facebook_table_reset.py diff --git a/tests/base_new_frmwrk.py b/tests/base_new_frmwrk.py new file mode 100644 index 00000000..798305aa --- /dev/null +++ b/tests/base_new_frmwrk.py @@ -0,0 +1,168 @@ + +import os +from datetime import timedelta +from tap_tester import connections, menagerie, runner, LOGGER +from tap_tester.base_suite_tests.base_case import BaseCase + + +class FacebookBaseTest(BaseCase): + """ + Setup expectations for test sub classes. + Metadata describing streams. + + A bunch of shared methods that are used in tap-tester tests. + Shared tap-specific methods (as needed). + + Insights Test Data by Date Ranges + "ads_insights": + "2019-08-02T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" + "2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" + "ads_insights_age_and_gender": + "2019-08-02T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" + "2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" + "ads_insights_country": + "2019-08-02T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" + "2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" + "ads_insights_platform_and_device": + "2019-08-02T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" + "2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" + "ads_insights_region": + "2019-08-03T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" + "2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" + "ads_insights_dma": + "2019-08-03T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" + "2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" + "ads_insights_hourly_advertiser": + "2019-08-03T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" + "2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" + + """ + FULL_TABLE = "FULL_TABLE" + BOOKMARK_COMPARISON_FORMAT = "%Y-%m-%dT00:00:00+00:00" + + start_date = "" + end_date = "" + + @staticmethod + def tap_name(): + """The name of the tap""" + return "tap-facebook" + + @staticmethod + def get_type(): + """the expected url route ending""" + return "platform.facebook" + + def get_properties(self): + """Configuration properties required for the tap.""" + return { + 'account_id': os.getenv('TAP_FACEBOOK_ACCOUNT_ID'), + 'start_date' : '2021-04-07T00:00:00Z', + 'end_date': '2021-04-09T00:00:00Z', + 'insights_buffer_days': '1', + } + + @staticmethod + def get_credentials(): + """Authentication information for the test account""" + return {'access_token': os.getenv('TAP_FACEBOOK_ACCESS_TOKEN')} + @staticmethod + def expected_metadata(): + """The expected streams and metadata about the streams""" + return { + "ads": { + BaseCase.PRIMARY_KEYS: {"id", "updated_time"}, + BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, + BaseCase.REPLICATION_KEYS: {"updated_time"} + }, + "adcreative": { + BaseCase.PRIMARY_KEYS: {"id"}, + BaseCase.REPLICATION_METHOD: BaseCase.FULL_TABLE, + }, + "adsets": { + BaseCase.PRIMARY_KEYS: {"id", "updated_time"}, + BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, + BaseCase.REPLICATION_KEYS: {"updated_time"} + }, + "campaigns": { + BaseCase.PRIMARY_KEYS: {"id", }, + BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, + BaseCase.REPLICATION_KEYS: {"updated_time"} + }, + "ads_insights": { + BaseCase.PRIMARY_KEYS: {"campaign_id", "adset_id", "ad_id", "date_start"}, + BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, + BaseCase.REPLICATION_KEYS: {"date_start"} + }, + "ads_insights_age_and_gender": { + BaseCase.PRIMARY_KEYS: { + "campaign_id", "adset_id", "ad_id", "date_start", "age", "gender" + }, + BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, + BaseCase.REPLICATION_KEYS: {"date_start"} + }, + "ads_insights_country": { + BaseCase.PRIMARY_KEYS: {"campaign_id", "adset_id", "ad_id", "date_start", "country"}, + BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, + BaseCase.REPLICATION_KEYS: {"date_start"} + }, + "ads_insights_platform_and_device": { + BaseCase.PRIMARY_KEYS: { + "campaign_id", "adset_id", "ad_id", "date_start", + "publisher_platform", "platform_position", "impression_device" + }, + BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, + BaseCase.REPLICATION_KEYS: {"date_start"} + }, + "ads_insights_region": { + BaseCase.PRIMARY_KEYS: {"region", "campaign_id", "adset_id", "ad_id", "date_start"}, + BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, + BaseCase.REPLICATION_KEYS: {"date_start"} + }, + "ads_insights_dma": { + BaseCase.PRIMARY_KEYS: {"dma", "campaign_id", "adset_id", "ad_id", "date_start"}, + BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, + BaseCase.REPLICATION_KEYS: {"date_start"} + }, + "ads_insights_hourly_advertiser": { + BaseCase.PRIMARY_KEYS: {"hourly_stats_aggregated_by_advertiser_time_zone", "campaign_id", "adset_id", "ad_id", "date_start"}, + BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, + BaseCase.REPLICATION_KEYS: {"date_start"} + }, + # "leads": { + # BaseCase.PRIMARY_KEYS: {"id"}, + # BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, + # BaseCase.REPLICATION_KEYS: {"created_time"} + # }, + } + + def set_replication_methods(self, conn_id, catalogs, replication_methods): + + replication_keys = self.expected_replication_keys() + for catalog in catalogs: + replication_method = replication_methods.get(catalog['stream_name']) + annt=menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + if replication_method == self.INCREMENTAL: + replication_key = list(replication_keys.get(catalog['stream_name']))[0] + replication_md = [{ "breadcrumb": [], "metadata":{ "selected" : True}}] + else: + replication_md = [{ "breadcrumb": [], "metadata": { "selected": None}}] + connections.set_non_discoverable_metadata( + conn_id, catalog, menagerie.get_annotated_schema(conn_id, catalog['stream_id']), replication_md) + + @classmethod + def setUpClass(cls,logging="Ensuring environment variables are sourced."): + super().setUpClass(logging=logging) + missing_envs = [x for x in [os.getenv('TAP_FACEBOOK_ACCESS_TOKEN'), + os.getenv('TAP_FACEBOOK_ACCOUNT_ID')] if x is None] + if len(missing_envs) != 0: + raise Exception("set environment variables") + + + ########################################################################## + ### Tap Specific Methods + ########################################################################## + + @staticmethod + def is_insight(stream): + return stream.startswith('ads_insights') diff --git a/tests/test_facebook_discovery.py b/tests/test_facebook_discovery.py index f882ea32..1a865a06 100644 --- a/tests/test_facebook_discovery.py +++ b/tests/test_facebook_discovery.py @@ -1,122 +1,14 @@ -"""Test tap discovery mode and metadata.""" -import re +import unittest +from tap_tester.base_suite_tests.discovery_test import DiscoveryTest -from tap_tester import menagerie, connections +from base_new_frmwrk import FacebookBaseTest -from base import FacebookBaseTest +class FacebookDiscoveryTest(DiscoveryTest, FacebookBaseTest): + """Standard Discovery Test""" -class DiscoveryTest(FacebookBaseTest): - """Test tap discovery mode and metadata conforms to standards.""" @staticmethod def name(): - return "tap_tester_facebook_discovery_test" - + return "tt_facebook_discovery" def streams_to_test(self): - return self.expected_streams() - - def test_run(self): - """ - Testing that discovery creates the appropriate catalog with valid metadata. - - • Verify number of actual streams discovered match expected - • Verify the stream names discovered were what we expect - • Verify stream names follow naming convention - streams should only have lowercase alphas and underscores - • verify there is only 1 top level breadcrumb - • verify replication key(s) - • verify primary key(s) - • verify that if there is a replication key we are doing INCREMENTAL otherwise FULL - • verify the actual replication matches our expected replication method - • verify that primary, replication and foreign keys - are given the inclusion of automatic. - • verify that all other fields have inclusion of available metadata. - """ - streams_to_test = self.streams_to_test() - - conn_id = connections.ensure_connection(self) - - found_catalogs = self.run_and_verify_check_mode(conn_id) - - # Verify stream names follow naming convention - # streams should only have lowercase alphas and underscores - found_catalog_names = {c['tap_stream_id'] for c in found_catalogs} - self.assertTrue(all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]), - msg="One or more streams don't follow standard naming") - - for stream in streams_to_test: - with self.subTest(stream=stream): - - # Verify ensure the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertIsNotNone(catalog) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = self.expected_replication_keys()[stream] - expected_automatic_fields = expected_primary_keys | expected_replication_keys - expected_replication_method = self.expected_replication_method()[stream] - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - metadata = schema_and_metadata["metadata"] - stream_properties = [item for item in metadata if item.get("breadcrumb") == []] - actual_primary_keys = set( - stream_properties[0].get( - "metadata", {self.PRIMARY_KEYS: []}).get(self.PRIMARY_KEYS, []) - ) - actual_replication_keys = set( - stream_properties[0].get( - "metadata", {self.REPLICATION_KEYS: []}).get(self.REPLICATION_KEYS, []) - ) - actual_replication_method = stream_properties[0].get( - "metadata", {self.REPLICATION_METHOD: None}).get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - - ########################################################################## - ### metadata assertions - ########################################################################## - - # verify there is only 1 top level breadcrumb in metadata - self.assertTrue(len(stream_properties) == 1, - msg="There is NOT only one top level breadcrumb for {}".format(stream) + \ - "\nstream_properties | {}".format(stream_properties)) - - # verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # verify the replication method matches our expectations - self.assertEqual( - expected_replication_method, actual_replication_method - ) - - # verify that if there is a replication key we are doing INCREMENTAL otherwise FULL - if actual_replication_keys: - self.assertEqual(self.INCREMENTAL, actual_replication_method) - else: - self.assertEqual(self.FULL_TABLE, actual_replication_method) - - # verify that primary keys and replication keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_automatic_fields, actual_automatic_fields) - - # verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields}), - msg="Not all non key properties are set to available in metadata") + return self.expected_stream_names() diff --git a/tests/test_facebook_table_reset.py b/tests/test_facebook_table_reset.py new file mode 100644 index 00000000..eb84dede --- /dev/null +++ b/tests/test_facebook_table_reset.py @@ -0,0 +1,62 @@ +import os +import dateutil.parser +import datetime +from base_new_frmwrk import FacebookBaseTest +from tap_tester.base_suite_tests.table_reset_test import TableResetTest + + +class FacebookTableResetTest(TableResetTest, FacebookBaseTest): + """tap-salesforce Table reset test implementation + Currently tests only the stream with Incremental replication method""" + + @staticmethod + def name(): + return "tt_facebook_table_reset" + + def streams_to_test(self): + return self.expected_stream_names() + + @property + def reset_stream(self): + return ('ads_insights_dma') + + + def calculated_states_by_stream(self, current_state): + + """ The following streams barely make the cut: + + campaigns "2021-02-09T18:17:30.000000Z" + "2021-02-09T16:24:58.000000Z" + + adsets "2021-02-09T18:17:41.000000Z" + "2021-02-09T17:10:09.000000Z" + + leads '2021-04-07T20:09:39+0000', + '2021-04-07T20:08:27+0000', + """ + timedelta_by_stream = {stream: [0,0,0] # {stream_name: [days, hours, minutes], ...} + for stream in self.expected_stream_names()} + timedelta_by_stream['campaigns'] = [0, 1, 0] + timedelta_by_stream['adsets'] = [0, 1, 0] + timedelta_by_stream['leads'] = [0, 0 , 1] + + stream_to_calculated_state = {stream: "" for stream in current_state['bookmarks'].keys()} + for stream, state in current_state['bookmarks'].items(): + state_key, state_value = next(iter(state.keys())), next(iter(state.values())) + state_as_datetime = dateutil.parser.parse(state_value) + days, hours, minutes = timedelta_by_stream[stream] + calculated_state_as_datetime = state_as_datetime - datetime.timedelta(days=days, hours=hours, minutes=minutes) + + state_format = '%Y-%m-%dT00:00:00+00:00' if self.is_insight(stream) else '%Y-%m-%dT%H:%M:%S-00:00' + calculated_state_formatted = datetime.datetime.strftime(calculated_state_as_datetime, state_format) + + stream_to_calculated_state[stream] = {state_key: calculated_state_formatted} + + return stream_to_calculated_state + + def manipulate_state(self,current_state): + new_states = {'bookmarks': dict()} + simulated_states = self.calculated_states_by_stream(current_state) + for stream, new_state in simulated_states.items(): + new_states['bookmarks'][stream] = new_state + return new_states