From 65f76c5ecdf0ddcbf5a84aaebba37f2519d4f568 Mon Sep 17 00:00:00 2001 From: Thomas Apodaca Date: Mon, 16 Nov 2015 15:25:37 -0800 Subject: [PATCH 1/4] test for GitHub project URLs with .git extensions --- test/updater/test_run_update.py | 39 ++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/test/updater/test_run_update.py b/test/updater/test_run_update.py index 99b8546..a502681 100644 --- a/test/updater/test_run_update.py +++ b/test/updater/test_run_update.py @@ -1289,6 +1289,44 @@ def overwrite_response_content(url, request): self.assertTrue(type(check_project.tags) is unicode) self.assertTrue(len(check_project.tags) > 0) + def test_git_extension_stripped_from_git_url(self): + ''' A .git extension is stripped from a project's GitHub URL + ''' + self.setup_mock_rss_response() + + from app import Project + import run_update + + # alter responses to return only one organization, with one project that + # has a GitHub URL with .git at the end + def overwrite_response_content(url, request): + if "docs.google.com" in url: + org_lines = [u'''name,website,events_url,rss,projects_list_url'''.encode('utf8'), u'''Cöde for Ameriça,http://codeforamerica.org,http://www.meetup.com/events/Code-For-Charlotte/,http://www.codeforamerica.org/blog/feed/,http://example.com/cfa-projects.csv'''.encode('utf8')] + return response(200, '''\n'''.join(org_lines), {'content-type': 'text/csv; charset=UTF-8'}) + elif url.geturl() == 'http://example.com/cfa-projects.csv': + project_lines = ['''Name,description,link_url,code_url,type,categories,tags,status'''.encode('utf8'), ''',,,https://github.com/codeforamerica/cityvoice.git,,,"safety, police, poverty",Shuttered'''.encode('utf8')] + return response(200, '''\n'''.join(project_lines), {'content-type': 'text/csv; charset=UTF-8'}) + + # run a standard run_update + with HTTMock(self.response_content): + with HTTMock(overwrite_response_content): + run_update.main(org_sources=run_update.TEST_ORG_SOURCES_FILENAME) + + check_project = self.db.session.query(Project).first() + # the project exists + self.assertIsNotNone(check_project) + self.assertIsNotNone(check_project.id) + # the project has issues + self.assertTrue(hasattr(check_project, 'issues')) + self.assertTrue(len(check_project.issues) > 0) + # the project has status & tags from civic.json + self.assertTrue(check_project.status is not None) + self.assertTrue(type(check_project.status) is unicode) + self.assertTrue(len(check_project.status) > 0) + self.assertTrue(check_project.tags is not None) + self.assertTrue(type(check_project.tags) is unicode) + self.assertTrue(len(check_project.tags) > 0) + def test_unmodified_projects_stay_in_database(self): ''' Verify that unmodified projects are not deleted from the database ''' @@ -1603,7 +1641,6 @@ def overwrite_response(url, request): def test_two_issues_with_the_same_name(self): ''' Two issues with the same name but different html_urls should be saved as separate issues. ''' - # ;;; from app import Project, Issue import run_update self.setup_mock_rss_response() From efea7eb79096e32c0f21744b5b04e9b73ba466b0 Mon Sep 17 00:00:00 2001 From: Thomas Apodaca Date: Mon, 16 Nov 2015 15:27:59 -0800 Subject: [PATCH 2/4] test for once-existing projects deleted on 404 --- test/updater/test_run_update.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/test/updater/test_run_update.py b/test/updater/test_run_update.py index a502681..ff15088 100644 --- a/test/updater/test_run_update.py +++ b/test/updater/test_run_update.py @@ -1674,6 +1674,38 @@ def overwrite_response_content(url, request): self.assertEqual(check_issue.title, same_title) self.assertEqual(check_issue.project_id, project_id) + def test_404ing_project_deleted(self): + ''' A project that once existed but is now returning a 404 is deleted from the database. + ''' + from app import Project + self.setup_mock_rss_response() + + # run a vanilla update + with HTTMock(self.response_content): + import run_update + run_update.main(org_sources=run_update.TEST_ORG_SOURCES_FILENAME) + + filter = Project.name == u'cityvoice' + projects = self.db.session.query(Project).filter(filter).all() + self.assertEqual(len(projects), 3) + + def overwrite_response_content(url, request): + if 'https://api.github.com/repos/codeforamerica/cityvoice' in url.geturl(): + return response(404, '''{"message": "Not Found", "documentation_url": "https://developer.github.com/v3"}''', {'ETag': '8456bc53d4cf6b78779ded3408886f82'}) + + logging.error = Mock() + + # run a new update + with HTTMock(self.response_content): + with HTTMock(overwrite_response_content): + import run_update + run_update.main(org_sources=run_update.TEST_ORG_SOURCES_FILENAME) + + logging.error.assert_called_with('https://api.github.com/repos/codeforamerica/cityvoice doesn\'t exist.') + filter = Project.name == u'cityvoice' + projects = self.db.session.query(Project).filter(filter).all() + self.assertEqual(len(projects), 0) + if __name__ == '__main__': unittest.main() From eb7eec5a24f69459797d0b69961fd81dfe0926ba Mon Sep 17 00:00:00 2001 From: Thomas Apodaca Date: Mon, 16 Nov 2015 15:29:03 -0800 Subject: [PATCH 3/4] delete existing projects that now return 404s --- run_update.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/run_update.py b/run_update.py index bf499f5..ba4584d 100644 --- a/run_update.py +++ b/run_update.py @@ -497,8 +497,14 @@ def update_project_info(project): if got.status_code in range(400, 499): if got.status_code == 404: - logging.error(repo_url + ' doesn\'t exist.') - # If its a bad GitHub link, don't return it at all. + # It's a bad GitHub link + logging.error(u"{} doesn't exist.".format(repo_url)) + # If there's an existing project in the database, get rid of it + if existing_project: + # this is redundant, but let's make sure + existing_project.keep = False + db.session.commit() + # Take the project out of the loop by returning None return None elif got.status_code == 403: @@ -525,7 +531,6 @@ def update_project_info(project): # nothing was updated, but make sure we keep the project # :::here (project/true) existing_project.keep = True - db.session.add(existing_project) # commit the project db.session.commit() return None From 7e453a57107473d36442058c81dcb49ff05a5b88 Mon Sep 17 00:00:00 2001 From: Thomas Apodaca Date: Mon, 16 Nov 2015 15:29:31 -0800 Subject: [PATCH 4/4] handle .git extension at the end of GitHub URLs --- run_update.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/run_update.py b/run_update.py index ba4584d..fa1d016 100644 --- a/run_update.py +++ b/run_update.py @@ -424,6 +424,8 @@ def make_root_github_project_path(path): ''' path_split = path.split('/') path = '/'.join(path_split[0:3]) + # some URLs have been passed to us with '.git' at the end + path = sub(ur'\.git$', '', path) return path