From d256dfe597dd112ee393130e851fa115502c9435 Mon Sep 17 00:00:00 2001
From: Andrey Kabanov <loeakaodas@users.noreply.github.com>
Date: Sun, 13 Feb 2022 18:46:41 -0800
Subject: [PATCH] Re-submitting PR #140 (Add additional base test suite tests)
 (#145)

* add mac to gitignore

* change testing repo

* adjust `start_date_2` for new repo

* add tap-tester automatic fields

* add tap-tester all fields

* add all expected streams to all fields test

* set specific bookmark for test-repo

* fix `collaborators` stream bookmark spelling  for tap-tester

* add more streams to automatic fields test

* add tap-tester bookmarks

* updates to automatic fields test:
* add check for unique primary keys in replicated records
* replace explicit set of expected streams with `expected_check_streams` from base
* update `test_run` doc string

* omit `team_memberships` stream from `expected_check_streams`

* build expected_check_streams() using expected_streams()

* add bug id and description

* pylint fixes:
* adjust imports
* use specific error class
* set encoding

* adjust circle config:
* use latest image
* trim pylint disable options
* add unit tests step
* make sure integration tests run always

* Re-submitting PR #141 (All repos for an organization) (#146)

* add parsing of "org/*" wildcard to retreive all repos for an org

* add unit test cases for extract_repos_from_config()

* add requests-mock for dev requirements

* add unit test for get_all_repos()

* Re-submitting PR #142 (Improve Rate Limiting and Retry Logic) (#143)

* add basic backoff retry

* add backoff to setup.py

* replace deprecated assertEquals method with assertEqual

* add MAX_SLEEP_SECONDS parsing from config and DEFAULT_MAX_SECONDS for rate limiting

* add comments for changes and use DEFAULT_SLEEP_SECONDS

* add pylint ignore for global-statement

* README updates: (#148)

- add full list of replicated streams
- update GitHub docs links

* add streams to excluded_streams that aren't respecting automatic fields

* add NotFoundException handling to collaborators stream

* add bug info

* adjust test for test-repo

* pylint fixes

* run unit and integations steps always

* don't raise a NotFoundException to deal with access issues to resources

* pylint fix and return empty response body for 404

* add collaborators stream to excluded set

* fixes to tap-tester tests

* adjust 2nd sync dates for data

* deal with None from get method

* adjust start date tap-tester dates

* actually deal with NoneType in get method

* FIX: sub_streams sync functions passed parent metadata

* remove expected_check_streams after bug identified and addressed

* use expected_streams after bug identified and addressed

* don't write a bookmark for FULL_TABLE streams

* update unit test expectations to recent changes

* updates to bookmarks tap-tester:
* adjust test expectatons for streams
* create simulated_states based on test data and tap behavior
* adjust tests based on commits and pr_commits schema

* update base based on tap behavior and test data

* adjust test expectations for team_members stream

* Exclude collaborators stream due to access issues in circle

* update circle config to include slack orb and tap-tester-user context

* add bug info

* add tap-tester-user to daily build context
---
 .circleci/config.yml                          |  31 +--
 .gitignore                                    |   5 +
 README.md                                     |  32 ++-
 setup.py                                      |   4 +-
 tap_github/__init__.py                        | 156 +++++++++----
 tests/base.py                                 |  34 +--
 tests/test_github_all_fields.py               |  90 ++++++++
 tests/test_github_automatic_fields.py         |  73 ++++++
 tests/test_github_bookmarks.py                | 207 ++++++++++++++++++
 tests/test_github_start_date.py               |  14 +-
 tests/unittests/test_exception_handling.py    |  24 +-
 .../test_extract_repos_from_config.py         |  32 +++
 tests/unittests/test_formatting_dates.py      |   4 +-
 tests/unittests/test_get_all_repos.py         |  74 +++++++
 tests/unittests/test_key_error.py             |  36 +--
 tests/unittests/test_rate_limit.py            |   2 +-
 tests/unittests/test_start_date_bookmark.py   |   8 +-
 tests/unittests/test_sub_streams_selection.py |   8 +-
 tests/unittests/test_verify_access.py         |  20 +-
 19 files changed, 699 insertions(+), 155 deletions(-)
 create mode 100644 tests/test_github_all_fields.py
 create mode 100644 tests/test_github_automatic_fields.py
 create mode 100644 tests/test_github_bookmarks.py
 create mode 100644 tests/unittests/test_extract_repos_from_config.py
 create mode 100644 tests/unittests/test_get_all_repos.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 78712556..a6969bf5 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,8 +1,11 @@
-version: 2
+version: 2.1
+orbs:
+  slack: circleci/slack@3.4.2
+
 jobs:
   build:
     docker:
-      - image: 218546966473.dkr.ecr.us-east-1.amazonaws.com/circle-ci:tap-tester-v4
+      - image: 218546966473.dkr.ecr.us-east-1.amazonaws.com/circle-ci:stitch-tap-tester
     steps:
       - checkout
       - run:
@@ -21,7 +24,7 @@ jobs:
           name: 'pylint'
           command: |
             source /usr/local/share/virtualenvs/tap-github/bin/activate
-            pylint tap_github --disable 'broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-class-docstring,missing-function-docstring,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,wrong-spelling-in-comment,wrong-spelling-in-docstring,bad-whitespace'
+            pylint tap_github --disable 'missing-module-docstring,missing-function-docstring,missing-class-docstring,line-too-long,invalid-name,too-many-lines,consider-using-f-string,too-many-arguments,too-many-locals'
       - run:
           name: 'Unit Tests'
           command: |
@@ -29,6 +32,7 @@ jobs:
             pip install nose coverage
             nosetests --with-coverage --cover-erase --cover-package=tap_github --cover-html-dir=htmlcov tests/unittests
             coverage html
+          when: always
       - store_test_results:
           path: test_output/report.xml
       - store_artifacts:
@@ -39,20 +43,19 @@ jobs:
             aws s3 cp s3://com-stitchdata-dev-deployment-assets/environments/tap-tester/tap_tester_sandbox dev_env.sh
             source dev_env.sh
             source /usr/local/share/virtualenvs/tap-tester/bin/activate
-            run-test --tap=tap-github \
-                     --target=target-stitch \
-                     --orchestrator=stitch-orchestrator \
-                     --email=harrison+sandboxtest@stitchdata.com \
-                     --password=$SANDBOX_PASSWORD \
-                     --client-id=50 \
-                     --token=$STITCH_API_TOKEN \
-                     tests
+            run-test --tap=tap-github tests
+          when: always
+      - slack/notify-on-failure:
+          only_for_branches: master
+
 workflows:
   version: 2
   commit:
     jobs:
       - build:
-          context: circleci-user
+          context:
+            - circleci-user
+            - tap-tester-user
   build_daily:
     triggers:
       - schedule:
@@ -63,4 +66,6 @@ workflows:
                 - master
     jobs:
       - build:
-          context: circleci-user
+          context:
+            - circleci-user
+            - tap-tester-user
diff --git a/.gitignore b/.gitignore
index 59ed95a6..57a09a59 100644
--- a/.gitignore
+++ b/.gitignore
@@ -97,3 +97,8 @@ properties.json
 
 # Jetbrains IDE
 .idea
+
+# macOS
+*.DS_Store
+.AppleDouble
+.LSOverride
\ No newline at end of file
diff --git a/README.md b/README.md
index 12454261..3e956789 100644
--- a/README.md
+++ b/README.md
@@ -2,20 +2,32 @@
 
 This is a [Singer](https://singer.io) tap that produces JSON-formatted
 data from the GitHub API following the [Singer
-spec](https://github.com/singer-io/getting-started/blob/master/SPEC.md).
+spec](https://github.com/singer-io/getting-started/blob/master/docs/SPEC.md).
 
 This tap:
 - Pulls raw data from the [GitHub REST API](https://developer.github.com/v3/)
 - Extracts the following resources from GitHub for a single repository:
-  - [Assignees](https://developer.github.com/v3/issues/assignees/#list-assignees)
-  - [Collaborators](https://developer.github.com/v3/repos/collaborators/#list-collaborators)
-  - [Commits](https://developer.github.com/v3/repos/commits/#list-commits-on-a-repository)
-  - [Issues](https://developer.github.com/v3/issues/#list-issues-for-a-repository)
-  - [Pull Requests](https://developer.github.com/v3/pulls/#list-pull-requests)
-  - [Comments](https://developer.github.com/v3/issues/comments/#list-comments-in-a-repository)
-  - [Reviews](https://developer.github.com/v3/pulls/reviews/#list-reviews-on-a-pull-request)
-  - [Review Comments](https://developer.github.com/v3/pulls/comments/)
-  - [Stargazers](https://developer.github.com/v3/activity/starring/#list-stargazers)
+  - [Assignees](https://docs.github.com/en/rest/reference/issues#list-assigneess)
+  - [Collaborators](https://docs.github.com/en/rest/reference/repos#list-repository-collaborators)
+  - [Commits](https://docs.github.com/en/rest/reference/repos#list-commits)
+  - [Commit Comments](https://docs.github.com/en/rest/reference/repos#list-commit-comments-for-a-repository)
+  - [Events](https://docs.github.com/en/rest/reference/issues#events)
+  - [Issues](https://docs.github.com/en/rest/reference/issues#list-repository-issues)
+  - [Issue Events](https://docs.github.com/en/rest/reference/issues#list-issue-events-for-a-repository)
+  - [Issue Milestones](https://docs.github.com/en/rest/reference/issues#list-milestones)
+  - [Projects](https://docs.github.com/en/rest/reference/projects#list-repository-projects)
+  - [Project Cards](https://docs.github.com/en/rest/reference/projects#list-project-cards)
+  - [Project Columns](https://docs.github.com/en/rest/reference/projects#list-project-columns)
+  - [Pull Requests](https://docs.github.com/en/rest/reference/pulls#list-pull-requests)
+  - [PR Commits](https://docs.github.com/en/rest/reference/pulls#list-commits-on-a-pull-request)
+  - [Releases](https://docs.github.com/en/rest/reference/repos#list-releases)
+  - [Comments](https://docs.github.com/en/rest/reference/issues#list-issue-comments-for-a-repository)
+  - [Reviews](https://docs.github.com/en/rest/reference/pulls#list-reviews-for-a-pull-request)
+  - [Review Comments](https://docs.github.com/en/rest/reference/pulls#list-review-comments-in-a-repository)
+  - [Stargazers](https://docs.github.com/en/rest/reference/activity#list-stargazers)
+  - [Teams](https://docs.github.com/en/rest/reference/teams#list-teams)
+  - [Team Members](https://docs.github.com/en/rest/reference/teams#list-team-members)
+  - [Team Memberships](https://docs.github.com/en/rest/reference/teams#get-team-membership-for-a-user)
 - Outputs the schema for each resource
 - Incrementally pulls data based on the input state
 
diff --git a/setup.py b/setup.py
index e37afe7e..4f8a4836 100644
--- a/setup.py
+++ b/setup.py
@@ -11,13 +11,15 @@
       py_modules=['tap_github'],
       install_requires=[
           'singer-python==5.12.1',
-          'requests==2.20.0'
+          'requests==2.20.0',
+          'backoff==1.8.0'
       ],
       extras_require={
           'dev': [
               'pylint==2.6.2',
               'ipdb',
               'nose',
+              'requests-mock==1.9.3'
           ]
       },
       entry_points='''
diff --git a/tap_github/__init__.py b/tap_github/__init__.py
index 3d4536c8..1f4eebcc 100644
--- a/tap_github/__init__.py
+++ b/tap_github/__init__.py
@@ -1,15 +1,13 @@
-import argparse
 import os
 import json
 import collections
 import time
 import requests
-import singer
-import singer.bookmarks as bookmarks
-import singer.metrics as metrics
 import backoff
+import singer
 
-from singer import metadata
+from singer import (bookmarks, metrics, metadata)
+from simplejson import JSONDecodeError
 
 session = requests.Session()
 logger = singer.get_logger()
@@ -45,6 +43,9 @@
     'team_memberships': ['url']
 }
 
+DEFAULT_SLEEP_SECONDS = 600
+MAX_SLEEP_SECONDS = DEFAULT_SLEEP_SECONDS
+
 class GithubException(Exception):
     pass
 
@@ -101,7 +102,7 @@ class RateLimitExceeded(GithubException):
     },
     404: {
         "raise_exception": NotFoundException,
-        "message": "The resource you have specified cannot be found"
+        "message": "The resource you have specified cannot be found. Alternatively the access_token is not valid for the resource"
     },
     409: {
         "raise_exception": ConflictError,
@@ -172,7 +173,7 @@ def raise_for_error(resp, source):
     error_code = resp.status_code
     try:
         response_json = resp.json()
-    except Exception:
+    except JSONDecodeError:
         response_json = {}
 
     if error_code == 404:
@@ -180,9 +181,12 @@ def raise_for_error(resp, source):
         if source == "teams":
             details += ' or it is a personal account repository'
         message = "HTTP-error-code: 404, Error: {}. Please refer \'{}\' for more details.".format(details, response_json.get("documentation_url"))
-    else:
-        message = "HTTP-error-code: {}, Error: {}".format(
-            error_code, ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("message", "Unknown Error") if response_json == {} else response_json)
+        logger.info(message)
+        # don't raise a NotFoundException
+        return None
+
+    message = "HTTP-error-code: {}, Error: {}".format(
+        error_code, ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("message", "Unknown Error") if response_json == {} else response_json)
 
     exc = ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("raise_exception", GithubException)
     raise exc(message) from None
@@ -195,7 +199,7 @@ def rate_throttling(response):
     if int(response.headers['X-RateLimit-Remaining']) == 0:
         seconds_to_sleep = calculate_seconds(int(response.headers['X-RateLimit-Reset']))
 
-        if seconds_to_sleep > 600:
+        if seconds_to_sleep > MAX_SLEEP_SECONDS:
             message = "API rate limit exceeded, please try after {} seconds.".format(seconds_to_sleep)
             raise RateLimitExceeded(message) from None
 
@@ -214,6 +218,9 @@ def authed_get(source, url, headers={}):
             raise_for_error(resp, source)
         timer.tags[metrics.Tag.http_status_code] = resp.status_code
         rate_throttling(resp)
+        if resp.status_code == 404:
+            # return an empty response body since we're not raising a NotFoundException
+            resp._content = b'{}' # pylint: disable=protected-access
         return resp
 
 def authed_get_all_pages(source, url, headers={}):
@@ -249,7 +256,7 @@ def load_schemas():
     for filename in os.listdir(get_abs_path('schemas')):
         path = get_abs_path('schemas') + '/' + filename
         file_raw = filename.replace('.json', '')
-        with open(path) as file:
+        with open(path, encoding='utf-8') as file:
             schemas[file_raw] = json.load(file)
 
     schemas['pr_commits'] = generate_pr_commit_schema(schemas['commits'])
@@ -315,6 +322,57 @@ def get_catalog():
 
     return {'streams': streams}
 
+def get_all_repos(organizations: list) -> list:
+    """
+    Retrieves all repositories for the provided organizations and
+        verifies basic access for them.
+
+    Docs: https://docs.github.com/en/rest/reference/repos#list-organization-repositories
+    """
+    repos = []
+
+    for org_path in organizations:
+        org = org_path.split('/')[0]
+        for response in authed_get_all_pages(
+            'get_all_repos',
+            'https://api.github.com/orgs/{}/repos?sort=created&direction=desc'.format(org)
+        ):
+            org_repos = response.json()
+
+            for repo in org_repos:
+                repo_full_name = repo.get('full_name')
+
+                logger.info("Verifying access of repository: %s", repo_full_name)
+                verify_repo_access(
+                    'https://api.github.com/repos/{}/commits'.format(repo_full_name),
+                    repo
+                )
+
+                repos.append(repo_full_name)
+
+    return repos
+
+def extract_repos_from_config(config: dict ) -> list:
+    """
+    Extracts all repositories from the config and calls get_all_repos()
+        for organizations using the wildcard 'org/*' format.
+    """
+    repo_paths = list(filter(None, config['repository'].split(' ')))
+
+    orgs_with_all_repos = list(filter(lambda x: x.split('/')[1] == '*', repo_paths))
+
+    if orgs_with_all_repos:
+        # remove any wildcard "org/*" occurrences from `repo_paths`
+        repo_paths = list(set(repo_paths).difference(set(orgs_with_all_repos)))
+
+        # get all repositores for an org in the config
+        all_repos = get_all_repos(orgs_with_all_repos)
+
+        # update repo_paths
+        repo_paths.extend(all_repos)
+
+    return repo_paths
+
 def verify_repo_access(url_for_repo, repo):
     try:
         authed_get("verifying repository access", url_for_repo)
@@ -328,7 +386,7 @@ def verify_access_for_repo(config):
     access_token = config['access_token']
     session.headers.update({'authorization': 'token ' + access_token, 'per_page': '1', 'page': '1'})
 
-    repositories = list(filter(None, config['repository'].split(' ')))
+    repositories = extract_repos_from_config(config)
 
     for repo in repositories:
         logger.info("Verifying access of repository: %s", repo)
@@ -360,18 +418,16 @@ def get_all_teams(schemas, repo_path, state, mdata, _start_date):
 
                 # transform and write release record
                 with singer.Transformer() as transformer:
-                    rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
+                    rec = transformer.transform(r, schemas['teams'], metadata=metadata.to_map(mdata['teams']))
                 singer.write_record('teams', rec, time_extracted=extraction_time)
-                singer.write_bookmark(state, repo_path, 'teams', {'since': singer.utils.strftime(extraction_time)})
                 counter.increment()
 
                 if schemas.get('team_members'):
-                    for team_members_rec in get_all_team_members(team_slug, schemas['team_members'], repo_path, state, mdata):
+                    for team_members_rec in get_all_team_members(team_slug, schemas['team_members'], repo_path, state, mdata['team_members']):
                         singer.write_record('team_members', team_members_rec, time_extracted=extraction_time)
-                        singer.write_bookmark(state, repo_path, 'team_members', {'since': singer.utils.strftime(extraction_time)})
 
                 if schemas.get('team_memberships'):
-                    for team_memberships_rec in get_all_team_memberships(team_slug, schemas['team_memberships'], repo_path, state, mdata):
+                    for team_memberships_rec in get_all_team_memberships(team_slug, schemas['team_memberships'], repo_path, state, mdata['team_memberships']):
                         singer.write_record('team_memberships', team_memberships_rec, time_extracted=extraction_time)
 
     return state
@@ -547,7 +603,6 @@ def get_all_issue_labels(schemas, repo_path, state, mdata, _start_date):
                 with singer.Transformer() as transformer:
                     rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
                 singer.write_record('issue_labels', rec, time_extracted=extraction_time)
-                singer.write_bookmark(state, repo_path, 'issue_labels', {'since': singer.utils.strftime(extraction_time)})
                 counter.increment()
 
     return state
@@ -616,7 +671,7 @@ def get_all_projects(schemas, repo_path, state, mdata, start_date):
 
                 # transform and write release record
                 with singer.Transformer() as transformer:
-                    rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
+                    rec = transformer.transform(r, schemas['projects'], metadata=metadata.to_map(mdata['projects']))
                 singer.write_record('projects', rec, time_extracted=extraction_time)
                 singer.write_bookmark(state, repo_path, 'projects', {'since': singer.utils.strftime(extraction_time)})
                 counter.increment()
@@ -627,14 +682,14 @@ def get_all_projects(schemas, repo_path, state, mdata, start_date):
 
                 # sync project_columns if that schema is present (only there if selected)
                 if schemas.get('project_columns'):
-                    for project_column_rec in get_all_project_columns(project_id, schemas['project_columns'], repo_path, state, mdata, start_date):
+                    for project_column_rec in get_all_project_columns(project_id, schemas['project_columns'], repo_path, state, mdata['project_columns'], start_date):
                         singer.write_record('project_columns', project_column_rec, time_extracted=extraction_time)
                         singer.write_bookmark(state, repo_path, 'project_columns', {'since': singer.utils.strftime(extraction_time)})
 
                         # sync project_cards if that schema is present (only there if selected)
                         if schemas.get('project_cards'):
                             column_id = project_column_rec['id']
-                            for project_card_rec in get_all_project_cards(column_id, schemas['project_cards'], repo_path, state, mdata, start_date):
+                            for project_card_rec in get_all_project_cards(column_id, schemas['project_cards'], repo_path, state, mdata['project_cards'], start_date):
                                 singer.write_record('project_cards', project_card_rec, time_extracted=extraction_time)
                                 singer.write_bookmark(state, repo_path, 'project_cards', {'since': singer.utils.strftime(extraction_time)})
     return state
@@ -721,7 +776,6 @@ def get_all_releases(schemas, repo_path, state, mdata, _start_date):
                 with singer.Transformer() as transformer:
                     rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
                 singer.write_record('releases', rec, time_extracted=extraction_time)
-                singer.write_bookmark(state, repo_path, 'releases', {'since': singer.utils.strftime(extraction_time)})
                 counter.increment()
 
     return state
@@ -761,14 +815,14 @@ def get_all_pull_requests(schemas, repo_path, state, mdata, start_date):
 
                     # transform and write pull_request record
                     with singer.Transformer() as transformer:
-                        rec = transformer.transform(pr, schemas['pull_requests'], metadata=metadata.to_map(mdata))
+                        rec = transformer.transform(pr, schemas['pull_requests'], metadata=metadata.to_map(mdata['pull_requests']))
                     singer.write_record('pull_requests', rec, time_extracted=extraction_time)
                     singer.write_bookmark(state, repo_path, 'pull_requests', {'since': singer.utils.strftime(extraction_time)})
                     counter.increment()
 
                     # sync reviews if that schema is present (only there if selected)
                     if schemas.get('reviews'):
-                        for review_rec in get_reviews_for_pr(pr_num, schemas['reviews'], repo_path, state, mdata):
+                        for review_rec in get_reviews_for_pr(pr_num, schemas['reviews'], repo_path, state, mdata['reviews']):
                             singer.write_record('reviews', review_rec, time_extracted=extraction_time)
                             singer.write_bookmark(state, repo_path, 'reviews', {'since': singer.utils.strftime(extraction_time)})
 
@@ -776,7 +830,7 @@ def get_all_pull_requests(schemas, repo_path, state, mdata, start_date):
 
                     # sync review comments if that schema is present (only there if selected)
                     if schemas.get('review_comments'):
-                        for review_comment_rec in get_review_comments_for_pr(pr_num, schemas['review_comments'], repo_path, state, mdata):
+                        for review_comment_rec in get_review_comments_for_pr(pr_num, schemas['review_comments'], repo_path, state, mdata['review_comments']):
                             singer.write_record('review_comments', review_comment_rec, time_extracted=extraction_time)
                             singer.write_bookmark(state, repo_path, 'review_comments', {'since': singer.utils.strftime(extraction_time)})
 
@@ -787,7 +841,7 @@ def get_all_pull_requests(schemas, repo_path, state, mdata, start_date):
                                 schemas['pr_commits'],
                                 repo_path,
                                 state,
-                                mdata
+                                mdata['pr_commits']
                         ):
                             singer.write_record('pr_commits', pr_commit, time_extracted=extraction_time)
                             singer.write_bookmark(state, repo_path, 'pr_commits', {'since': singer.utils.strftime(extraction_time)})
@@ -859,7 +913,6 @@ def get_all_assignees(schema, repo_path, state, mdata, _start_date):
                 with singer.Transformer() as transformer:
                     rec = transformer.transform(assignee, schema, metadata=metadata.to_map(mdata))
                 singer.write_record('assignees', rec, time_extracted=extraction_time)
-                singer.write_bookmark(state, repo_path, 'assignees', {'since': singer.utils.strftime(extraction_time)})
                 counter.increment()
 
     return state
@@ -869,19 +922,26 @@ def get_all_collaborators(schema, repo_path, state, mdata, _start_date):
     https://developer.github.com/v3/repos/collaborators/#list-collaborators
     '''
     with metrics.record_counter('collaborators') as counter:
-        for response in authed_get_all_pages(
-                'collaborators',
-                'https://api.github.com/repos/{}/collaborators'.format(repo_path)
-        ):
-            collaborators = response.json()
-            extraction_time = singer.utils.now()
-            for collaborator in collaborators:
-                collaborator['_sdc_repository'] = repo_path
-                with singer.Transformer() as transformer:
-                    rec = transformer.transform(collaborator, schema, metadata=metadata.to_map(mdata))
-                singer.write_record('collaborators', rec, time_extracted=extraction_time)
-                singer.write_bookmark(state, repo_path, 'collaborator', {'since': singer.utils.strftime(extraction_time)})
-                counter.increment()
+        try:
+            responses = authed_get_all_pages(
+                    'collaborators',
+                    'https://api.github.com/repos/{}/collaborators'.format(repo_path)
+                )
+        except NotFoundException as error:
+            logger.info(
+                'Unable to retreive collaborators stream, check access_token is valid for %s. See full error message: %s',
+                    repo_path, error
+                )
+        else:
+            for response in responses:
+                collaborators = response.json()
+                extraction_time = singer.utils.now()
+                for collaborator in collaborators:
+                    collaborator['_sdc_repository'] = repo_path
+                    with singer.Transformer() as transformer:
+                        rec = transformer.transform(collaborator, schema, metadata=metadata.to_map(mdata))
+                    singer.write_record('collaborators', rec, time_extracted=extraction_time)
+                    counter.increment()
 
     return state
 
@@ -987,7 +1047,6 @@ def get_all_stargazers(schema, repo_path, state, mdata, _start_date):
                     rec = transformer.transform(stargazer, schema, metadata=metadata.to_map(mdata))
                 rec['user_id'] = user_id
                 singer.write_record('stargazers', rec, time_extracted=extraction_time)
-                singer.write_bookmark(state, repo_path, 'stargazers', {'since': singer.utils.strftime(extraction_time)})
                 counter.increment()
 
     return state
@@ -1064,7 +1123,7 @@ def do_sync(config, state, catalog):
     selected_stream_ids = get_selected_streams(catalog)
     validate_dependencies(selected_stream_ids)
 
-    repositories = list(filter(None, config['repository'].split(' ')))
+    repositories = extract_repos_from_config(config)
 
     state = translate_state(state, catalog, repositories)
     singer.write_state(state)
@@ -1096,17 +1155,19 @@ def do_sync(config, state, catalog):
                 # handle streams with sub streams
                 else:
                     stream_schemas = {stream_id: stream_schema}
+                    stream_mdata = {stream_id: mdata}
 
                     # get and write selected sub stream schemas
                     for sub_stream_id in sub_stream_ids:
                         if sub_stream_id in selected_stream_ids:
                             sub_stream = get_stream_from_catalog(sub_stream_id, catalog)
                             stream_schemas[sub_stream_id] = sub_stream['schema']
+                            stream_mdata[sub_stream_id] =  sub_stream['metadata']
                             singer.write_schema(sub_stream_id, sub_stream['schema'],
                                                 sub_stream['key_properties'])
 
                     # sync stream and it's sub streams
-                    state = sync_func(stream_schemas, repo, state, mdata, start_date)
+                    state = sync_func(stream_schemas, repo, state, stream_mdata, start_date)
 
                 singer.write_state(state)
 
@@ -1114,6 +1175,13 @@ def do_sync(config, state, catalog):
 def main():
     args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS)
 
+    # get optional config key `max_sleep_seconds`
+    config_max_sleep = args.config.get('max_sleep_seconds')
+
+    # set global `MAX_SLEEP_SECONDS` for rate_throttling function or use default
+    global MAX_SLEEP_SECONDS #pylint: disable=global-statement
+    MAX_SLEEP_SECONDS = config_max_sleep if config_max_sleep else DEFAULT_SLEEP_SECONDS
+
     if args.discover:
         do_discover(args.config)
     else:
diff --git a/tests/base.py b/tests/base.py
index 7b204e64..6ea415a4 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -46,8 +46,8 @@ def get_properties(self, original: bool = True):
         :param original: set to false to change the start_date or end_date
         """
         return_value = {
-            'start_date' : dt.strftime(dt.utcnow()-timedelta(days=5), self.START_DATE_FORMAT),
-            'repository': 'singer-io/tap-github'
+            'start_date' : '2021-10-01T00:00:00Z',
+            'repository': 'singer-io/test-repo'
         }
         if original:
             return return_value
@@ -61,32 +61,6 @@ def get_credentials(self):
             'access_token': os.getenv("TAP_GITHUB_TOKEN")
         }
 
-    @staticmethod
-    def expected_check_streams():
-        return {
-            'assignees',
-            'collaborators',
-            'comments',
-            'commit_comments',
-            'commits',
-            'events',
-            'issue_labels',
-            'issue_milestones',
-            'issue_events',
-            'issues',
-            'pr_commits',
-            'project_cards',
-            'project_columns',
-            'projects',
-            'pull_requests',
-            'releases',
-            'review_comments',
-            'reviews',
-            'stargazers',
-            'team_members',
-            'team_memberships',
-            'teams'
-        }
 
     def expected_metadata(self):
         """The expected streams and metadata about the streams"""
@@ -134,7 +108,7 @@ def expected_metadata(self):
             "issue_milestones": {
                 self.PRIMARY_KEYS: {"id"},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.BOOKMARK: {"due_on"},
+                self.BOOKMARK: {"updated_at"},
                 self.OBEYS_START_DATE: True
             },
             "issue_events": {
@@ -193,7 +167,7 @@ def expected_metadata(self):
             "reviews": {
                 self.PRIMARY_KEYS: {"id"},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.BOOKMARK: {"updated_at"},
+                self.BOOKMARK: {"submitted_at"},
                 self.OBEYS_START_DATE: True
             },
             "stargazers": {
diff --git a/tests/test_github_all_fields.py b/tests/test_github_all_fields.py
new file mode 100644
index 00000000..17173dc1
--- /dev/null
+++ b/tests/test_github_all_fields.py
@@ -0,0 +1,90 @@
+import os
+
+from tap_tester import runner, connections, menagerie
+
+from base import TestGithubBase
+
+
+class TestGithubAllFields(TestGithubBase):
+    """Test that with all fields selected for a stream automatic and available fields are  replicated"""
+
+    @staticmethod
+    def name():
+        return "tap_tester_github_all_fields"
+
+    def test_run(self):
+        """
+        Ensure running the tap with all streams and fields selected results in the
+        replication of all fields.
+        - Verify no unexpected streams were replicated
+        - Verify that more than just the automatic fields are replicated for each stream.
+        """
+        # BUG TDL-16672
+        # The excluded streams are not honoring all fields selection
+        excluded_streams = {
+            'issue_events',
+            'comments',
+            'projects',
+            'pr_commits',
+            'events',
+            'review_comments',
+            'issues',
+            'project_cards',
+            'project_columns',
+            'commits',
+            'collaborators'
+            }
+
+        expected_streams = self.expected_streams() - excluded_streams
+
+        # instantiate connection
+        conn_id = connections.ensure_connection(self)
+
+        # run check mode
+        found_catalogs = self.run_and_verify_check_mode(conn_id)
+
+        # table and field selection
+        test_catalogs_all_fields = [catalog for catalog in found_catalogs
+                                    if catalog.get('stream_name') in expected_streams]
+        self.perform_and_verify_table_and_field_selection(
+            conn_id, test_catalogs_all_fields, select_all_fields=True,
+        )
+
+        # grab metadata after performing table-and-field selection to set expectations
+        stream_to_all_catalog_fields = dict() # used for asserting all fields are replicated
+        for catalog in test_catalogs_all_fields:
+            stream_id, stream_name = catalog['stream_id'], catalog['stream_name']
+            catalog_entry = menagerie.get_annotated_schema(conn_id, stream_id)
+            fields_from_field_level_md = [md_entry['breadcrumb'][1]
+                                          for md_entry in catalog_entry['metadata']
+                                          if md_entry['breadcrumb'] != []]
+            stream_to_all_catalog_fields[stream_name] = set(fields_from_field_level_md)
+
+        # run initial sync
+        record_count_by_stream = self.run_and_verify_sync(conn_id)
+        synced_records = runner.get_records_from_target_output()
+
+        # Verify no unexpected streams were replicated
+        synced_stream_names = set(synced_records.keys())
+        self.assertSetEqual(expected_streams, synced_stream_names)
+
+        for stream in expected_streams:
+            with self.subTest(stream=stream):
+                # expected values
+                expected_automatic_keys = self.expected_primary_keys().get(stream)
+
+                # get all expected keys
+                expected_all_keys = stream_to_all_catalog_fields[stream]
+
+                # collect actual values
+                messages = synced_records.get(stream)
+                actual_all_keys = [set(message['data'].keys()) for message in messages['messages']
+                                   if message['action'] == 'upsert'][0]
+
+                # Verify that you get some records for each stream
+                self.assertGreater(record_count_by_stream.get(stream, -1), 0)
+
+                # verify all fields for a stream were replicated
+                self.assertGreater(len(expected_all_keys), len(expected_automatic_keys))
+                self.assertTrue(expected_automatic_keys.issubset(expected_all_keys), msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"')
+                self.assertSetEqual(expected_all_keys, actual_all_keys)
diff --git a/tests/test_github_automatic_fields.py b/tests/test_github_automatic_fields.py
new file mode 100644
index 00000000..03ae904f
--- /dev/null
+++ b/tests/test_github_automatic_fields.py
@@ -0,0 +1,73 @@
+"""
+Test that with no fields selected for a stream automatic fields are still replicated
+"""
+from tap_tester import runner, connections
+
+from base import TestGithubBase
+
+
+class TestGithubAutomaticFields(TestGithubBase):
+    """Test that with no fields selected for a stream automatic fields are still replicated"""
+
+    @staticmethod
+    def name():
+        return "tap_tester_github_automatic_fields"
+
+    def test_run(self):
+        """
+        - Verify that for each stream you can get multiple pages of data
+            when no fields are selected.
+        - Verify that only the automatic fields are sent to the target.
+        - Verify that all replicated records have unique primary key values.
+        """
+        # Exclude collaborators stream due to access issues in circle
+        expected_streams = self.expected_streams() - {'collaborators'}
+
+        # instantiate connection
+        conn_id = connections.ensure_connection(self)
+
+        # run check mode
+        found_catalogs = self.run_and_verify_check_mode(conn_id)
+
+        # table and field selection
+        test_catalogs_automatic_fields = [catalog for catalog in found_catalogs
+                                          if catalog.get('stream_name') in expected_streams]
+
+        self.perform_and_verify_table_and_field_selection(
+            conn_id, test_catalogs_automatic_fields, select_all_fields=False,
+        )
+
+        # run initial sync
+        record_count_by_stream = self.run_and_verify_sync(conn_id)
+        synced_records = runner.get_records_from_target_output()
+
+        for stream in expected_streams:
+            with self.subTest(stream=stream):
+                # expected values
+                expected_keys = self.expected_primary_keys().get(stream)
+
+                # collect actual values
+                data = synced_records.get(stream, {})
+                record_messages_keys = [set(row.get('data').keys()) for row in data.get('messages', {})]
+                primary_keys_list = [
+                    tuple(message.get('data').get(expected_pk) for expected_pk in expected_keys)
+                    for message in data.get('messages')
+                    if message.get('action') == 'upsert']
+                unique_primary_keys_list = set(primary_keys_list)
+
+                # Verify that you get some records for each stream
+                self.assertGreater(
+                    record_count_by_stream.get(stream, -1), 0,
+                    msg="The number of records is not over the stream max limit for the {} stream".format(stream))
+
+                # Verify that only the automatic fields are sent to the target
+                for actual_keys in record_messages_keys:
+                    self.assertSetEqual(expected_keys, actual_keys)
+
+                # BUG-TDL-17507 An org can have multiple teams with overlapping membership
+                if stream != 'team_members':
+                    # Verify that all replicated records have unique primary key values.
+                    self.assertEqual(
+                        len(primary_keys_list),
+                        len(unique_primary_keys_list),
+                        msg="Replicated record does not have unique primary key values.")
diff --git a/tests/test_github_bookmarks.py b/tests/test_github_bookmarks.py
new file mode 100644
index 00000000..3520a9d8
--- /dev/null
+++ b/tests/test_github_bookmarks.py
@@ -0,0 +1,207 @@
+import datetime
+import dateutil.parser
+import pytz
+
+from tap_tester import runner, menagerie, connections
+
+from base import TestGithubBase
+
+
+class TestGithubBookmarks(TestGithubBase):
+    @staticmethod
+    def name():
+        return "tap_tester_github_bookmarks"
+
+    @staticmethod
+    def convert_state_to_utc(date_str):
+        """
+        Convert a saved bookmark value of the form '2020-08-25T13:17:36-07:00' to
+        a string formatted utc datetime,
+        in order to compare against json formatted datetime values
+        """
+        date_object = dateutil.parser.parse(date_str)
+        date_object_utc = date_object.astimezone(tz=pytz.UTC)
+        return datetime.datetime.strftime(date_object_utc, "%Y-%m-%dT%H:%M:%SZ")
+
+    def calculated_states_by_stream(self, current_state, synced_records, replication_keys):
+        """
+        Look at the bookmarks from a previous sync and set a new bookmark
+        value based off timedelta expectations. This ensures the subsequent sync will replicate
+        at least 1 record but, fewer records than the previous sync.
+
+        If the test data is changed in the future this will break expectations for this test.
+        """
+        timedelta_by_stream = {stream: [90,0,0]  # {stream_name: [days, hours, minutes], ...}
+                               for stream in self.expected_streams()}
+        timedelta_by_stream['comments'] = [7, 0, 0]
+        timedelta_by_stream['commit_comments'] = [0, 0, 1]
+        timedelta_by_stream['commits'] = [0, 17, 0]
+        timedelta_by_stream['issue_events'] = [1, 0, 0]
+        timedelta_by_stream['issue_milestones'] = [0, 1, 0]
+        timedelta_by_stream['issues'] = [7, 0, 0]
+        timedelta_by_stream['pull_requests'] = [7, 0, 0]
+
+        repo = self.get_properties().get('repository')
+
+        stream_to_calculated_state = {stream: "" for stream in current_state['bookmarks'][repo].keys()}
+        for stream, state in current_state['bookmarks'][repo].items():
+            state_key, state_value = next(iter(state.keys())), next(iter(state.values()))
+            sync_messages = [record.get('data') for record in
+                                synced_records.get(stream, {'messages': []}).get('messages')
+                                if record.get('action') == 'upsert']
+
+            # the `commits` and `pr_commits` streams don't have a top level replication_key field
+            if stream in ('commits', 'pr_commits'):
+                max_record_values = [values.get('commit', {}).get('committer', {}).get('date')
+                    for values in sync_messages]
+                max_value = max(max_record_values)
+            else:
+                replication_key = next(iter(replication_keys.get(stream)))
+                max_record_values = [values.get(replication_key) for values in sync_messages]
+                max_value = max(max_record_values)
+
+            # this is because the tap uses `time_extracted` to bookmark with `since` at execution
+            new_state_value = min(max_value, state_value)
+            state_as_datetime = dateutil.parser.parse(new_state_value)
+
+            days, hours, minutes = timedelta_by_stream[stream]
+            calculated_state_as_datetime = state_as_datetime - datetime.timedelta(days=days, hours=hours, minutes=minutes)
+
+            state_format = '%Y-%m-%dT%H:%M:%S-00:00'
+            calculated_state_formatted = datetime.datetime.strftime(calculated_state_as_datetime, state_format)
+
+            stream_to_calculated_state[stream] = {state_key: calculated_state_formatted}
+
+        return stream_to_calculated_state
+
+
+    def test_run(self):
+        # Exclude collaborators stream due to access issues in circle
+        expected_streams = self.expected_streams() - {'collaborators'}
+
+        expected_replication_keys = self.expected_bookmark_keys()
+        expected_replication_methods = self.expected_replication_method()
+
+        repo = self.get_properties().get('repository')
+
+        ##########################################################################
+        ### First Sync
+        ##########################################################################
+
+        conn_id = connections.ensure_connection(self, original_properties=True)
+
+        # Run in check mode
+        found_catalogs = self.run_and_verify_check_mode(conn_id)
+
+        # Select only the expected streams tables
+        catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in expected_streams]
+        self.perform_and_verify_table_and_field_selection(conn_id, catalog_entries, select_all_fields=True)
+
+        # Run a sync job using orchestrator
+        first_sync_record_count = self.run_and_verify_sync(conn_id)
+        first_sync_records = runner.get_records_from_target_output()
+        first_sync_bookmarks = menagerie.get_state(conn_id)
+
+        ##########################################################################
+        ### Update State Between Syncs
+        ##########################################################################
+
+        new_states = {'bookmarks': dict()}
+        simulated_states = self.calculated_states_by_stream(first_sync_bookmarks,
+            first_sync_records, expected_replication_keys)
+        for stream, new_state in simulated_states.items():
+            new_states['bookmarks'][stream] = new_state
+        menagerie.set_state(conn_id, new_states)
+
+        ##########################################################################
+        ### Second Sync
+        ##########################################################################
+
+        second_sync_record_count = self.run_and_verify_sync(conn_id)
+        second_sync_records = runner.get_records_from_target_output()
+        second_sync_bookmarks = menagerie.get_state(conn_id)
+
+        ##########################################################################
+        ### Test By Stream
+        ##########################################################################
+
+        for stream in expected_streams:
+            with self.subTest(stream=stream):
+
+                # expected values
+                expected_replication_method = expected_replication_methods[stream]
+
+                # collect information for assertions from syncs 1 & 2 base on expected values
+                first_sync_count = first_sync_record_count.get(stream, 0)
+                second_sync_count = second_sync_record_count.get(stream, 0)
+                first_sync_messages = [record.get('data') for record in
+                                       first_sync_records.get(stream, {'messages': []}).get('messages')
+                                       if record.get('action') == 'upsert']
+                second_sync_messages = [record.get('data') for record in
+                                        second_sync_records.get(stream, {'messages': []}).get('messages')
+                                        if record.get('action') == 'upsert']
+                first_bookmark_key_value = first_sync_bookmarks.get('bookmarks', {}).get(repo, {stream: None}).get(stream)
+                second_bookmark_key_value = second_sync_bookmarks.get('bookmarks', {}).get(repo, {stream: None}).get(stream)
+
+
+                if expected_replication_method == self.INCREMENTAL:
+                    # collect information specific to incremental streams from syncs 1 & 2
+                    replication_key = next(iter(expected_replication_keys[stream]))
+                    first_bookmark_value = first_bookmark_key_value.get('since')
+                    second_bookmark_value = second_bookmark_key_value.get('since')
+                    first_bookmark_value_utc = self.convert_state_to_utc(first_bookmark_value)
+                    second_bookmark_value_utc = self.convert_state_to_utc(second_bookmark_value)
+
+                    # Verify the first sync sets a bookmark of the expected form
+                    self.assertIsNotNone(first_bookmark_key_value)
+                    self.assertIsNotNone(first_bookmark_key_value.get('since'))
+
+                    # Verify the second sync sets a bookmark of the expected form
+                    self.assertIsNotNone(second_bookmark_key_value)
+                    self.assertIsNotNone(second_bookmark_key_value.get('since'))
+
+                    # Verify the second sync bookmark is Equal or Greater than the first sync bookmark
+                    # the tap uses `time_extracted` and sets a bookmark using `since` for all real/pseudo incremental streams
+                    self.assertGreaterEqual(second_bookmark_value, first_bookmark_value)
+
+                    for record in second_sync_messages:
+                        # Verify the second sync bookmark value is the max replication key value for a given stream
+                        if stream in ('commits', 'pr_commits'):
+                            replication_key_value = record.get('commit', {}).get('committer', {}).get('date')
+                        else:
+                            replication_key_value = record.get(replication_key)
+                        self.assertLessEqual(
+                            replication_key_value, second_bookmark_value_utc,
+                            msg="Second sync bookmark was set incorrectly, a record with a greater replication-key value was synced."
+                        )
+
+                    for record in first_sync_messages:
+                        # Verify the first sync bookmark value is the max replication key value for a given stream
+                        if stream in ('commits', 'pr_commits'):
+                            replication_key_value = record.get('commit', {}).get('committer', {}).get('date')
+                        else:
+                            replication_key_value = record.get(replication_key)
+                        self.assertLessEqual(
+                            replication_key_value, first_bookmark_value_utc,
+                            msg="First sync bookmark was set incorrectly, a record with a greater replication-key value was synced."
+                        )
+
+                    # Verify the number of records in the 2nd sync is less then the first
+                    self.assertLessEqual(second_sync_count, first_sync_count)
+
+
+                elif expected_replication_method == self.FULL:
+                    # Verify the syncs do not set a bookmark for full table streams
+                    self.assertIsNone(first_bookmark_key_value)
+                    self.assertIsNone(second_bookmark_key_value)
+
+                    # Verify the number of records in the second sync is the same as the first
+                    self.assertEqual(second_sync_count, first_sync_count)
+
+                else:
+                    raise NotImplementedError(
+                        "INVALID EXPECTATIONS\t\tSTREAM: {} REPLICATION_METHOD: {}".format(stream, expected_replication_method)
+                    )
+
+                # Verify at least 1 record was replicated in the second sync
+                self.assertGreater(second_sync_count, 0, msg="We are not fully testing bookmarking for {}".format(stream))
diff --git a/tests/test_github_start_date.py b/tests/test_github_start_date.py
index e48ab584..34065255 100644
--- a/tests/test_github_start_date.py
+++ b/tests/test_github_start_date.py
@@ -18,7 +18,7 @@ def name():
     def generate_data(self):
         # get the token
         token = os.getenv("TAP_GITHUB_TOKEN")
-        url = "https://api.github.com/user/starred/singer-io/tap-github"
+        url = "https://api.github.com/user/starred/singer-io/test-repo"
         headers = {"Authorization": "Bearer {}".format(token)}
 
         # generate a data for 'events' stream: 'watchEvent' ie. star the repo
@@ -33,13 +33,13 @@ def test_run(self):
 
         # run the test for all the streams excluding 'events' stream
         # as for 'events' stream we have to use dynamic dates
-        self.run_test('2020-04-01T00:00:00Z', '2021-06-10T00:00:00Z', self.expected_streams() - {'events'})
+        self.run_test('2020-04-01T00:00:00Z', '2021-10-08T00:00:00Z', self.expected_streams() - {'events'})
 
         # As per the Documentation: https://docs.github.com/en/rest/reference/activity#events
         # the 'events' of past 90 days will only be returned
         # if there are no events in past 90 days, then there will be '304 Not Modified' error
         today = datetime.today()
-        date_1 = datetime.strftime(today - timedelta(days=4), "%Y-%m-%dT00:00:00Z")
+        date_1 = datetime.strftime(today - timedelta(days=90), "%Y-%m-%dT00:00:00Z")
         date_2 = datetime.strftime(today - timedelta(days=1), "%Y-%m-%dT00:00:00Z")
         # run the test for 'events' stream
         self.run_test(date_1, date_2, {'events'})
@@ -126,11 +126,11 @@ def run_test(self, date_1, date_2, streams):
                 # collect information for assertions from syncs 1 & 2 base on expected values
                 record_count_sync_1 = record_count_by_stream_1.get(stream, 0)
                 record_count_sync_2 = record_count_by_stream_2.get(stream, 0)
-                primary_keys_list_1 = [tuple(message.get('data').get(expected_pk) for expected_pk in expected_primary_keys)
-                                       for message in synced_records_1.get(stream).get('messages')
+                primary_keys_list_1 = [tuple(message.get('data', {}).get(expected_pk) for expected_pk in expected_primary_keys)
+                                       for message in synced_records_1.get(stream, {'messages': []}).get('messages')
                                        if message.get('action') == 'upsert']
-                primary_keys_list_2 = [tuple(message.get('data').get(expected_pk) for expected_pk in expected_primary_keys)
-                                       for message in synced_records_2.get(stream).get('messages')
+                primary_keys_list_2 = [tuple(message.get('data', {}).get(expected_pk) for expected_pk in expected_primary_keys)
+                                       for message in synced_records_2.get(stream, {'messages': []}).get('messages')
                                        if message.get('action') == 'upsert']
 
                 primary_keys_sync_1 = set(primary_keys_list_1)
diff --git a/tests/unittests/test_exception_handling.py b/tests/unittests/test_exception_handling.py
index 8036e093..e2c86120 100644
--- a/tests/unittests/test_exception_handling.py
+++ b/tests/unittests/test_exception_handling.py
@@ -32,7 +32,7 @@ def test_zero_content_length(self, mocked_parse_args, mocked_request):
         try:
             tap_github.authed_get("", "")
         except tap_github.BadRequestException as e:
-            self.assertEquals(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.")
+            self.assertEqual(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.")
 
     def test_400_error(self, mocked_parse_args, mocked_request):
         mocked_request.return_value = get_response(400, raise_error = True)
@@ -40,7 +40,7 @@ def test_400_error(self, mocked_parse_args, mocked_request):
         try:
             tap_github.authed_get("", "")
         except tap_github.BadRequestException as e:
-            self.assertEquals(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.")
+            self.assertEqual(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.")
     
     def test_401_error(self, mocked_parse_args, mocked_request):
         mocked_request.return_value = get_response(401, raise_error = True)
@@ -48,7 +48,7 @@ def test_401_error(self, mocked_parse_args, mocked_request):
         try:
             tap_github.authed_get("", "")
         except tap_github.BadCredentialsException as e:
-            self.assertEquals(str(e), "HTTP-error-code: 401, Error: Invalid authorization credentials.")
+            self.assertEqual(str(e), "HTTP-error-code: 401, Error: Invalid authorization credentials.")
     
     def test_403_error(self, mocked_parse_args, mocked_request):
         mocked_request.return_value = get_response(403, raise_error = True)
@@ -56,7 +56,7 @@ def test_403_error(self, mocked_parse_args, mocked_request):
         try:
             tap_github.authed_get("", "")
         except tap_github.AuthException as e:
-            self.assertEquals(str(e), "HTTP-error-code: 403, Error: User doesn't have permission to access the resource.")
+            self.assertEqual(str(e), "HTTP-error-code: 403, Error: User doesn't have permission to access the resource.")
     
     def test_404_error(self, mocked_parse_args, mocked_request):
         json = {"message": "Not Found", "documentation_url": "https:/docs.github.com/"}
@@ -65,7 +65,7 @@ def test_404_error(self, mocked_parse_args, mocked_request):
         try:
             tap_github.authed_get("", "")
         except tap_github.NotFoundException as e:
-            self.assertEquals(str(e), "HTTP-error-code: 404, Error: The resource you have specified cannot be found. Please refer '{}' for more details.".format(json.get("documentation_url")))
+            self.assertEqual(str(e), "HTTP-error-code: 404, Error: The resource you have specified cannot be found. Please refer '{}' for more details.".format(json.get("documentation_url")))
 
     def test_404_error_for_teams(self, mocked_parse_args, mocked_request):
         json = {"message": "Not Found", "documentation_url": "https:/docs.github.com/"}
@@ -73,7 +73,7 @@ def test_404_error_for_teams(self, mocked_parse_args, mocked_request):
         try:
             tap_github.raise_for_error(get_response(404, json = json, raise_error = True), "teams")
         except tap_github.NotFoundException as e:
-            self.assertEquals(str(e), "HTTP-error-code: 404, Error: The resource you have specified cannot be found or it is a personal account repository. Please refer '{}' for more details.".format(json.get("documentation_url")))
+            self.assertEqual(str(e), "HTTP-error-code: 404, Error: The resource you have specified cannot be found or it is a personal account repository. Please refer '{}' for more details.".format(json.get("documentation_url")))
 
     def test_500_error(self, mocked_parse_args, mocked_request):
         mocked_request.return_value = get_response(500, raise_error = True)
@@ -81,7 +81,7 @@ def test_500_error(self, mocked_parse_args, mocked_request):
         try:
             tap_github.authed_get("", "")
         except tap_github.InternalServerError as e:
-            self.assertEquals(str(e), "HTTP-error-code: 500, Error: An error has occurred at Github's end.")
+            self.assertEqual(str(e), "HTTP-error-code: 500, Error: An error has occurred at Github's end.")
 
     def test_301_error(self, mocked_parse_args, mocked_request):
         mocked_request.return_value = get_response(301, raise_error = True)
@@ -89,7 +89,7 @@ def test_301_error(self, mocked_parse_args, mocked_request):
         try:
             tap_github.authed_get("", "")
         except tap_github.MovedPermanentlyError as e:
-            self.assertEquals(str(e), "HTTP-error-code: 301, Error: The resource you are looking for is moved to another URL.")
+            self.assertEqual(str(e), "HTTP-error-code: 301, Error: The resource you are looking for is moved to another URL.")
 
     def test_304_error(self, mocked_parse_args, mocked_request):
         mocked_request.return_value = get_response(304, raise_error = True)
@@ -97,7 +97,7 @@ def test_304_error(self, mocked_parse_args, mocked_request):
         try:
             tap_github.authed_get("", "")
         except tap_github.NotModifiedError as e:
-            self.assertEquals(str(e), "HTTP-error-code: 304, Error: The requested resource has not been modified since the last time you accessed it.")
+            self.assertEqual(str(e), "HTTP-error-code: 304, Error: The requested resource has not been modified since the last time you accessed it.")
 
     def test_422_error(self, mocked_parse_args, mocked_request):
         mocked_request.return_value = get_response(422, raise_error = True)
@@ -105,7 +105,7 @@ def test_422_error(self, mocked_parse_args, mocked_request):
         try:
             tap_github.authed_get("", "")
         except tap_github.UnprocessableError as e:
-            self.assertEquals(str(e), "HTTP-error-code: 422, Error: The request was not able to process right now.")
+            self.assertEqual(str(e), "HTTP-error-code: 422, Error: The request was not able to process right now.")
 
     def test_409_error(self, mocked_parse_args, mocked_request):
         mocked_request.return_value = get_response(409, raise_error = True)
@@ -113,11 +113,11 @@ def test_409_error(self, mocked_parse_args, mocked_request):
         try:
             tap_github.authed_get("", "")
         except tap_github.ConflictError as e:
-            self.assertEquals(str(e), "HTTP-error-code: 409, Error: The request could not be completed due to a conflict with the current state of the server.")
+            self.assertEqual(str(e), "HTTP-error-code: 409, Error: The request could not be completed due to a conflict with the current state of the server.")
 
     def test_200_success(self, mocked_parse_args, mocked_request):
         json = {"key": "value"}
         mocked_request.return_value = get_response(200, json)
 
         resp = tap_github.authed_get("", "")
-        self.assertEquals(json, resp.json())
+        self.assertEqual(json, resp.json())
diff --git a/tests/unittests/test_extract_repos_from_config.py b/tests/unittests/test_extract_repos_from_config.py
new file mode 100644
index 00000000..4a205696
--- /dev/null
+++ b/tests/unittests/test_extract_repos_from_config.py
@@ -0,0 +1,32 @@
+import unittest
+import tap_github
+
+
+@unittest.mock.patch('tap_github.get_all_repos')
+class TestExtractReposFromConfig(unittest.TestCase):
+
+    def test_single_repo(self, mocked_get_all_repos):
+        config = {'repository': 'singer-io/test-repo'}
+        expected_repositories = ['singer-io/test-repo']
+        self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config))
+
+    def test_multiple_repos(self, mocked_get_all_repos):
+        config = {'repository': 'singer-io/test-repo singer-io/tap-github'}
+        expected_repositories = ['singer-io/test-repo', 'singer-io/tap-github']
+        self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config))
+
+    def test_org_all_repos(self, mocked_get_all_repos):
+        config = {'repository': 'singer-io/test-repo test-org/*'}
+        expected_repositories = [
+            'singer-io/test-repo',
+            'test-org/repo1',
+            'test-org/repo2',
+            'test-org/repo3'
+            ]
+        mocked_get_all_repos.return_value = [
+            'test-org/repo1',
+            'test-org/repo2',
+            'test-org/repo3'
+        ]
+
+        self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config))
diff --git a/tests/unittests/test_formatting_dates.py b/tests/unittests/test_formatting_dates.py
index 701b2714..72a70925 100644
--- a/tests/unittests/test_formatting_dates.py
+++ b/tests/unittests/test_formatting_dates.py
@@ -91,7 +91,7 @@ def test_due_on_not_none_2(self, mocked_request):
 
         final_state = tap_github.get_all_issue_milestones({}, repo_path, init_state, {}, "")
         # as we will get 0 records, initial and final bookmark will be same
-        self.assertEquals(init_bookmark, final_state["bookmarks"][repo_path]["issue_milestones"]["since"])
+        self.assertEqual(init_bookmark, final_state["bookmarks"][repo_path]["issue_milestones"]["since"])
 
     @mock.patch("singer.write_record")
     def test_data_containing_both_values(self, mocked_write_record, mocked_request):
@@ -117,4 +117,4 @@ def test_data_containing_both_values(self, mocked_write_record, mocked_request):
         # as we will get 2 record, final bookmark will be greater than initial bookmark
         self.assertGreater(last_bookmark, init_bookmark)
         # as we will get 2 record, write_records will also be called 2 times
-        self.assertEquals(mocked_write_record.call_count, 2)
+        self.assertEqual(mocked_write_record.call_count, 2)
diff --git a/tests/unittests/test_get_all_repos.py b/tests/unittests/test_get_all_repos.py
new file mode 100644
index 00000000..c8ca7a0b
--- /dev/null
+++ b/tests/unittests/test_get_all_repos.py
@@ -0,0 +1,74 @@
+import unittest
+import requests
+import requests_mock
+import simplejson as json
+
+import tap_github
+
+from itertools import cycle
+
+
+SESSION = requests.Session()
+ADAPTER = requests_mock.Adapter()
+SESSION.mount('mock://', ADAPTER)
+
+
+@unittest.mock.patch('tap_github.verify_repo_access')
+@unittest.mock.patch('tap_github.authed_get_all_pages')
+class TestGetAllRepos(unittest.TestCase):
+
+    def test_single_organization(self, mocked_authed_get_all_pages, mocked_verify_repo_access):
+        orgs = ['test-org/*']
+        repos = ['repo1', 'repo2', 'repo3']
+
+        mocked_url = 'mock://github.com/orgs/test-org/repos'
+        mocked_response_body = [
+            {'full_name': ''.join(r).replace('*', '')} for r in zip(cycle(orgs), repos)
+            ]
+        mocked_response_text = json.dumps(mocked_response_body)
+        ADAPTER.register_uri(
+            'GET',
+            mocked_url,
+            text=mocked_response_text)
+        mocked_response = SESSION.get(mocked_url)
+
+        expected_repositories = [
+            'test-org/repo1',
+            'test-org/repo2',
+            'test-org/repo3'
+            ]
+        mocked_authed_get_all_pages.return_value = [mocked_response]
+
+        self.assertEqual(expected_repositories, tap_github.get_all_repos(orgs))
+
+    def test_multiple_organizations(self, mocked_authed_get_all_pages, mocked_verify_repo_access):
+        orgs = ['test-org/*', 'singer-io/*']
+        repos = ['repo1', 'repo2', 'repo3']
+
+        mocked_url = 'mock://github.com/orgs/test-org/repos'
+        side_effect = []
+        for org in orgs:
+            mocked_response_body = [
+                {'full_name': ''.join(r).replace('*', '')} for r in zip(cycle([org]), repos)
+                ]
+            ADAPTER.register_uri(
+                'GET',
+                mocked_url,
+                text=json.dumps(mocked_response_body))
+            mocked_response = SESSION.get(mocked_url)
+            mocked_authed_get_all_pages.return_value = [mocked_response]
+
+            call_response = tap_github.get_all_repos([org])
+
+            side_effect.extend(call_response)
+
+        expected_repositories = [
+            'test-org/repo1',
+            'test-org/repo2',
+            'test-org/repo3',
+            'singer-io/repo1',
+            'singer-io/repo2',
+            'singer-io/repo3'
+            ]
+
+        self.assertListEqual(expected_repositories, side_effect)
diff --git a/tests/unittests/test_key_error.py b/tests/unittests/test_key_error.py
index ab23600d..7e5bb28c 100644
--- a/tests/unittests/test_key_error.py
+++ b/tests/unittests/test_key_error.py
@@ -23,21 +23,22 @@ def test_slug_sub_stream_selected_slug_selected(self, mocked_team_members, mocke
         mocked_request.return_value = get_response(json)
 
         schemas = {"teams": "None", "team_members": "None"}
-        mdata  =[
+        mdata_slug = [
         {
             'breadcrumb': [], 
             'metadata': {'selected': True, 'table-key-properties': ['id']}
-        }, 
+        },
         {
             'breadcrumb': ['properties', 'slug'], 
             'metadata': {'inclusion': 'available'}
-        }, 
+        },
         {
             "breadcrumb": [ "properties", "name"],
             "metadata": {"inclusion": "available"}
         }]
+        mdata  = {"teams": mdata_slug, "team_members": mdata_slug}
         tap_github.get_all_teams(schemas, "tap-github", {}, mdata, "")
-        self.assertEquals(mocked_team_members.call_count, 1)
+        self.assertEqual(mocked_team_members.call_count, 1)
 
     @mock.patch("tap_github.__init__.get_all_team_members")
     def test_slug_sub_stream_not_selected_slug_selected(self, mocked_team_members, mocked_request):
@@ -46,9 +47,9 @@ def test_slug_sub_stream_not_selected_slug_selected(self, mocked_team_members, m
         mocked_request.return_value = get_response(json)
 
         schemas = {"teams": "None"}
-        mdata  =[
+        mdata  = {"teams": [
         {
-            'breadcrumb': [], 
+            'breadcrumb': [],
             'metadata': {'selected': True, 'table-key-properties': ['id']}
         }, 
         {
@@ -58,9 +59,9 @@ def test_slug_sub_stream_not_selected_slug_selected(self, mocked_team_members, m
         {
             "breadcrumb": [ "properties", "name"],
             "metadata": {"inclusion": "available"}
-        }]
+        }]}
         tap_github.get_all_teams(schemas, "tap-github", {}, mdata, "")
-        self.assertEquals(mocked_team_members.call_count, 0)
+        self.assertEqual(mocked_team_members.call_count, 0)
 
     @mock.patch("tap_github.__init__.get_all_team_members")
     def test_slug_sub_stream_selected_slug_not_selected(self, mocked_team_members, mocked_request):
@@ -69,7 +70,7 @@ def test_slug_sub_stream_selected_slug_not_selected(self, mocked_team_members, m
         mocked_request.return_value = get_response(json)
 
         schemas = {"teams": "None", "team_members": "None"}
-        mdata  =[
+        mdata_slug = [
         {
             'breadcrumb': [], 
             'metadata': {'selected': True, 'table-key-properties': ['id']}
@@ -82,8 +83,9 @@ def test_slug_sub_stream_selected_slug_not_selected(self, mocked_team_members, m
             "breadcrumb": [ "properties", "name"],
             "metadata": {"inclusion": "available"}
         }]
+        mdata  = {"teams": mdata_slug, "team_members": mdata_slug}
         tap_github.get_all_teams(schemas, "tap-github", {}, mdata, "")
-        self.assertEquals(mocked_team_members.call_count, 1)
+        self.assertEqual(mocked_team_members.call_count, 1)
 
     @mock.patch("tap_github.__init__.get_all_team_members")
     def test_slug_sub_stream_not_selected_slug_not_selected(self, mocked_team_members, mocked_request):
@@ -92,7 +94,7 @@ def test_slug_sub_stream_not_selected_slug_not_selected(self, mocked_team_member
         mocked_request.return_value = get_response(json)
 
         schemas = {"teams": "None"}
-        mdata  =[
+        mdata  = {"teams": [
         {
             'breadcrumb': [], 
             'metadata': {'selected': True, 'table-key-properties': ['id']}
@@ -104,9 +106,9 @@ def test_slug_sub_stream_not_selected_slug_not_selected(self, mocked_team_member
         {
             "breadcrumb": [ "properties", "name"],
             "metadata": {"inclusion": "available"}
-        }]
+        }]}
         tap_github.get_all_teams(schemas, "tap-github", {}, mdata, "")
-        self.assertEquals(mocked_team_members.call_count, 0)
+        self.assertEqual(mocked_team_members.call_count, 0)
 
 @mock.patch("tap_github.__init__.authed_get_all_pages")
 class TestKeyErrorUser(unittest.TestCase):
@@ -118,7 +120,7 @@ def test_user_not_selected_in_stargazers(self, mocked_write_records, mocked_requ
         mocked_request.return_value = get_response(json)
 
         schemas = {"teams": "None"}
-        mdata  =[
+        mdata  = [
         {
             'breadcrumb': [], 
             'metadata': {'selected': True, 'table-key-properties': ['user_id']}
@@ -132,7 +134,7 @@ def test_user_not_selected_in_stargazers(self, mocked_write_records, mocked_requ
           "metadata": {"inclusion": "available"}
         }]
         tap_github.get_all_stargazers(schemas, "tap-github", {}, mdata, "")
-        self.assertEquals(mocked_write_records.call_count, 1)
+        self.assertEqual(mocked_write_records.call_count, 1)
 
     @mock.patch("singer.write_record")
     def test_user_selected_in_stargazers(self, mocked_write_records, mocked_request):
@@ -141,7 +143,7 @@ def test_user_selected_in_stargazers(self, mocked_write_records, mocked_request)
         mocked_request.return_value = get_response(json)
 
         schemas = {"stargazers": "None"}
-        mdata  =[
+        mdata  = [
         {
             'breadcrumb': [], 
             'metadata': {'selected': True, 'table-key-properties': ['user_id']}
@@ -155,4 +157,4 @@ def test_user_selected_in_stargazers(self, mocked_write_records, mocked_request)
           "metadata": {"inclusion": "available"}
         }]
         tap_github.get_all_stargazers(schemas, "tap-github", {}, mdata, "")
-        self.assertEquals(mocked_write_records.call_count, 1)
+        self.assertEqual(mocked_write_records.call_count, 1)
diff --git a/tests/unittests/test_rate_limit.py b/tests/unittests/test_rate_limit.py
index d335a1e5..7fb01873 100644
--- a/tests/unittests/test_rate_limit.py
+++ b/tests/unittests/test_rate_limit.py
@@ -36,7 +36,7 @@ def test_rate_limit_exception(self, mocked_sleep):
         try:
             tap_github.rate_throttling(resp)
         except tap_github.RateLimitExceeded as e:
-            self.assertEquals(str(e), "API rate limit exceeded, please try after 601 seconds.")
+            self.assertEqual(str(e), "API rate limit exceeded, please try after 601 seconds.")
 
 
     def test_rate_limit_not_exceeded(self, mocked_sleep):
diff --git a/tests/unittests/test_start_date_bookmark.py b/tests/unittests/test_start_date_bookmark.py
index 6489b1d2..8cfb4b18 100644
--- a/tests/unittests/test_start_date_bookmark.py
+++ b/tests/unittests/test_start_date_bookmark.py
@@ -12,7 +12,7 @@ def test_no_bookmark_no_start_date(self, mocked_get_bookmark):
         bookmark_key = 'since'
         expected_bookmark_value  = None
 
-        self.assertEquals(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date))
+        self.assertEqual(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date))
 
     def test_no_bookmark_yes_start_date(self, mocked_get_bookmark):
         # Start date is present and bookmark is not present then start date should be return.
@@ -21,7 +21,7 @@ def test_no_bookmark_yes_start_date(self, mocked_get_bookmark):
         bookmark_key = 'since'
         expected_bookmark_value  = '2021-04-01T00:00:00.000000Z'
 
-        self.assertEquals(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date))
+        self.assertEqual(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date))
 
     def test_yes_bookmark_yes_start_date(self, mocked_get_bookmark):
         # Start date and bookmark both are present then bookmark should be return.
@@ -30,7 +30,7 @@ def test_yes_bookmark_yes_start_date(self, mocked_get_bookmark):
         bookmark_key = 'since'
         expected_bookmark_value  = '2021-05-01T00:00:00.000000Z'
 
-        self.assertEquals(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date))
+        self.assertEqual(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date))
 
     def test_yes_bookmark_no_start_date(self, mocked_get_bookmark):
         # Start date is not present and bookmark is present then bookmark should be return.
@@ -39,4 +39,4 @@ def test_yes_bookmark_no_start_date(self, mocked_get_bookmark):
         bookmark_key = 'since'
         expected_bookmark_value  = '2021-05-01T00:00:00.000000Z'
 
-        self.assertEquals(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date))
+        self.assertEqual(expected_bookmark_value, tap_github.get_bookmark('', '', '', bookmark_key, start_date))
diff --git a/tests/unittests/test_sub_streams_selection.py b/tests/unittests/test_sub_streams_selection.py
index 329f88b2..8dd16ff9 100644
--- a/tests/unittests/test_sub_streams_selection.py
+++ b/tests/unittests/test_sub_streams_selection.py
@@ -12,7 +12,7 @@ def test_pull_request_sub_streams_not_selected(self):
         try:
             tap_github.validate_dependencies(selected_streams)
         except tap_github.DependencyException as e:
-            self.assertEquals(str(e), "Unable to extract 'reviews' data, to receive 'reviews' data, you also need to select 'pull_requests'. Unable to extract 'pr_commits' data, to receive 'pr_commits' data, you also need to select 'pull_requests'.")
+            self.assertEqual(str(e), "Unable to extract 'reviews' data, to receive 'reviews' data, you also need to select 'pull_requests'. Unable to extract 'pr_commits' data, to receive 'pr_commits' data, you also need to select 'pull_requests'.")
 
     def test_teams_sub_streams_selected(self):
         selected_streams = ["teams", "team_members"]
@@ -23,7 +23,7 @@ def test_teams_sub_streams_not_selected(self):
         try:
             tap_github.validate_dependencies(selected_streams)
         except tap_github.DependencyException as e:
-            self.assertEquals(str(e), "Unable to extract 'team_members' data, to receive 'team_members' data, you also need to select 'teams'.")
+            self.assertEqual(str(e), "Unable to extract 'team_members' data, to receive 'team_members' data, you also need to select 'teams'.")
 
     def test_projects_sub_streams_selected(self):
         selected_streams = ["projects", "project_cards"]
@@ -34,7 +34,7 @@ def test_projects_sub_streams_not_selected(self):
         try:
             tap_github.validate_dependencies(selected_streams)
         except tap_github.DependencyException as e:
-            self.assertEquals(str(e), "Unable to extract 'project_columns' data, to receive 'project_columns' data, you also need to select 'projects'.")
+            self.assertEqual(str(e), "Unable to extract 'project_columns' data, to receive 'project_columns' data, you also need to select 'projects'.")
 
     def test_mixed_streams_positive(self):
         selected_streams = ["pull_requests", "reviews", "collaborators", "team_members", "stargazers", "projects", "teams", "project_cards"]
@@ -45,4 +45,4 @@ def test_mixed_streams_negative(self):
         try:
             tap_github.validate_dependencies(selected_streams)
         except tap_github.DependencyException as e:
-            self.assertEquals(str(e), "Unable to extract 'review_comments' data, to receive 'review_comments' data, you also need to select 'pull_requests'.")
+            self.assertEqual(str(e), "Unable to extract 'review_comments' data, to receive 'review_comments' data, you also need to select 'pull_requests'.")
diff --git a/tests/unittests/test_verify_access.py b/tests/unittests/test_verify_access.py
index 72f868de..1e00df32 100644
--- a/tests/unittests/test_verify_access.py
+++ b/tests/unittests/test_verify_access.py
@@ -34,7 +34,7 @@ def test_repo_not_found(self, mocked_parse_args, mocked_request):
         try:
             tap_github.verify_repo_access("", "repo")
         except tap_github.NotFoundException as e:
-            self.assertEquals(str(e), "HTTP-error-code: 404, Error: Please check the repository name 'repo' or you do not have sufficient permissions to access this repository.")
+            self.assertEqual(str(e), "HTTP-error-code: 404, Error: Please check the repository name 'repo' or you do not have sufficient permissions to access this repository.")
 
     def test_repo_bad_request(self, mocked_parse_args, mocked_request):
         mocked_request.return_value = get_response(400, raise_error = True)
@@ -42,7 +42,7 @@ def test_repo_bad_request(self, mocked_parse_args, mocked_request):
         try:
             tap_github.verify_repo_access("", "repo")
         except tap_github.BadRequestException as e:
-            self.assertEquals(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.")
+            self.assertEqual(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.")
 
     def test_repo_bad_creds(self, mocked_parse_args, mocked_request):
         json = {"message": "Bad credentials", "documentation_url": "https://docs.github.com/"}
@@ -51,7 +51,7 @@ def test_repo_bad_creds(self, mocked_parse_args, mocked_request):
         try:
             tap_github.verify_repo_access("", "repo")
         except tap_github.BadCredentialsException as e:
-            self.assertEquals(str(e), "HTTP-error-code: 401, Error: {}".format(json))
+            self.assertEqual(str(e), "HTTP-error-code: 401, Error: {}".format(json))
 
     @mock.patch("tap_github.get_catalog")
     def test_discover_valid_creds(self, mocked_get_catalog, mocked_parse_args, mocked_request):
@@ -71,8 +71,8 @@ def test_discover_not_found(self, mocked_get_catalog, mocked_parse_args, mocked_
         try:
             tap_github.do_discover({"access_token": "access_token", "repository": "org/repo"})
         except tap_github.NotFoundException as e:
-                self.assertEquals(str(e), "HTTP-error-code: 404, Error: Please check the repository name 'org/repo' or you do not have sufficient permissions to access this repository.")
-        self.assertEqual(mocked_get_catalog.call_count, 0)
+            self.assertEqual(str(e), "HTTP-error-code: 404, Error: Please check the repository name org/repo or you do not have sufficient permissions to access this repository.")
+        self.assertEqual(mocked_get_catalog.call_count, 1)
 
     @mock.patch("tap_github.get_catalog")
     def test_discover_bad_request(self, mocked_get_catalog, mocked_parse_args, mocked_request):
@@ -82,7 +82,7 @@ def test_discover_bad_request(self, mocked_get_catalog, mocked_parse_args, mocke
         try:
             tap_github.do_discover({"access_token": "access_token", "repository": "org/repo"})
         except tap_github.BadRequestException as e:
-                self.assertEquals(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.")
+            self.assertEqual(str(e), "HTTP-error-code: 400, Error: The request is missing or has a bad parameter.")
         self.assertEqual(mocked_get_catalog.call_count, 0)
 
     @mock.patch("tap_github.get_catalog")
@@ -94,7 +94,7 @@ def test_discover_bad_creds(self, mocked_get_catalog, mocked_parse_args, mocked_
         try:
             tap_github.do_discover({"access_token": "access_token", "repository": "org/repo"})
         except tap_github.BadCredentialsException as e:
-                self.assertEquals(str(e), "HTTP-error-code: 401, Error: {}".format(json))
+            self.assertEqual(str(e), "HTTP-error-code: 401, Error: {}".format(json))
         self.assertEqual(mocked_get_catalog.call_count, 0)
 
     @mock.patch("tap_github.get_catalog")
@@ -106,7 +106,7 @@ def test_discover_forbidden(self, mocked_get_catalog, mocked_parse_args, mocked_
         try:
             tap_github.do_discover({"access_token": "access_token", "repository": "org/repo"})
         except tap_github.AuthException as e:
-                self.assertEquals(str(e), "HTTP-error-code: 403, Error: {}".format(json))
+            self.assertEqual(str(e), "HTTP-error-code: 403, Error: {}".format(json))
         self.assertEqual(mocked_get_catalog.call_count, 0)
 
 
@@ -123,5 +123,5 @@ def test_repo_call_count(self, mocked_repo, mocked_logger_info):
         config = {"access_token": "access_token", "repository": "org1/repo1 org1/repo2 org2/repo1"}
         tap_github.verify_access_for_repo(config)
 
-        self.assertEquals(mocked_logger_info.call_count, 3)
-        self.assertEquals(mocked_repo.call_count, 3)
+        self.assertEqual(mocked_logger_info.call_count, 3)
+        self.assertEqual(mocked_repo.call_count, 3)