Skip to content

Commit

Permalink
add tests for ERDDAPTableCrawler
Browse files Browse the repository at this point in the history
  • Loading branch information
aperrin66 committed Mar 4, 2024
1 parent 39712c5 commit 7743981
Show file tree
Hide file tree
Showing 3 changed files with 329 additions and 0 deletions.
57 changes: 57 additions & 0 deletions tests/data/erddap/coverage.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"table": {
"columnNames": ["time", "longitude", "latitude", "time_qc", "position_qc"],
"columnTypes": ["String", "double", "double", "String", "String"],
"columnUnits": ["UTC", "degrees_east", "degrees_north", null, null],
"rows": [
["1997-07-28T20:26:20Z", -11.863, -0.126, "1", "1"],
["1997-08-09T01:52:41Z", -13.83, -0.035, "1", "1"],
["1997-08-19T20:44:44Z", -15.744, 0.68, "1", "1"],
["1997-08-30T20:12:43Z", -16.674, 0.76, "1", "1"],
["1997-09-10T21:03:19Z", -17.133, 1.21, "1", "1"],
["1997-09-21T20:19:15Z", -17.74, 1.403, "1", "1"],
["1997-10-03T01:53:47Z", -17.734, 1.263, "1", "1"],
["1997-10-13T20:37:03Z", -17.189, 1.756, "1", "1"],
["1997-10-24T20:05:22Z", -16.437, 1.191, "1", "1"],
["1997-11-04T20:55:40Z", -16.039, 1.409, "1", "1"],
["1997-11-15T20:11:40Z", -15.451, 1.177, "1", "1"],
["1997-11-27T01:52:43Z", -15.075, 1.132, "1", "1"],
["1997-12-07T20:30:55Z", -14.329, 1.182, "1", "1"],
["1997-12-19T02:49:09Z", -13.586, 1.285, "1", "1"],
["1997-12-29T20:48:55Z", -13.488, 1.766, "1", "1"],
["1998-01-09T20:10:10Z", -13.593, 2.086, "1", "1"],
["1998-01-21T01:51:04Z", -14.115, 2.533, "1", "1"],
["1998-01-31T20:21:22Z", -15.016, 2.923, "1", "1"],
["1998-02-12T02:47:20Z", -15.901, 3.11, "1", "1"],
["1998-02-22T20:38:16Z", -16.634, 3.042, "1", "1"],
["1998-03-05T20:06:40Z", -16.874, 3.115, "1", "1"],
["1998-03-16T20:57:05Z", -17.081, 3.125, "1", "1"],
["1998-03-27T20:11:55Z", -17.515, 3.171, "1", "1"],
["1998-04-08T02:45:13Z", -17.623, 3.318, "1", "1"],
["1998-04-18T20:28:59Z", -17.668, 3.358, "1", "1"],
["1998-04-30T02:07:29Z", -17.332, 3.699, "1", "1"],
["1998-05-10T20:45:39Z", -16.714, 3.96, "1", "1"],
["1998-05-21T20:08:57Z", -15.962, 4.15, "1", "1"],
["1998-06-02T02:41:15Z", -15.254, 3.998, "1", "1"],
["1998-06-12T20:18:12Z", -14.585, 4.127, "1", "1"],
["1998-06-24T02:00:06Z", -14.048, 4.175, "1", "1"],
["1998-07-04T20:35:34Z", -13.926, 4.17, "1", "1"],
["1998-07-16T02:55:13Z", -13.769, 4.183, "1", "1"],
["1998-07-27T02:37:14Z", -13.47, 4.276, "1", "1"],
["1998-08-06T20:11:11Z", -13.134, 4.322, "1", "1"],
["1998-08-18T01:56:55Z", -12.887, 4.221, "1", "1"],
["1998-08-28T20:22:40Z", -12.702, 4.292, "1", "1"],
["1998-09-09T02:50:30Z", -12.415, 4.275, "1", "1"],
["1998-09-20T04:07:55Z", -12.116, 4.126, "1", "1"],
["1998-10-01T02:09:51Z", -11.792, 3.997, "1", "1"],
["1998-10-12T03:25:33Z", -11.3, 3.732, "1", "1"],
["1998-10-22T20:09:23Z", -10.925, 3.94, "1", "1"],
["1998-11-03T02:42:30Z", -10.152, 3.852, "1", "1"],
["1998-11-14T02:23:54Z", -9.558, 4.015, "1", "1"],
["1998-11-25T02:01:50Z", -9.756, 4.6, "1", "1"],
["1998-12-06T03:16:18Z", -10.046, 5.203, "1", "1"],
["1998-12-16T20:42:42Z", -9.934, 5.179, "1", "1"],
["1998-12-27T20:00:25Z", -9.612, 4.975, "1", "1"]
]
}
}
21 changes: 21 additions & 0 deletions tests/data/erddap/ids.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"table": {
"columnNames": ["platform_number"],
"columnTypes": ["String"],
"columnUnits": [null],
"rows": [
["3901480"],
["5905121"],
["5905267"],
["5905498"],
["5905533"],
["5905765"],
["5905878"],
["5906337"],
["5906912"],
["5906993"],
["6902906"],
["6903060"]
]
}
}
251 changes: 251 additions & 0 deletions tests/test_generic_crawlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1318,3 +1318,254 @@ def test_get_normalized_attributes(self):
'geospaas_service': 'ftp'
})
mock_handler.get_parameters.assert_called_once_with({'url': 'ftp://uri'})


class ERDDAPTableCrawlerTestCase(unittest.TestCase):
"""Tests for ERDDAPTableCrawler"""

TEST_DATA_PATH = os.path.join(os.path.dirname(__file__), 'data', 'erddap')

def test_url_check(self):
"""ERDDAPTableCrawler's url should end with .json"""
with self.assertRaises(ValueError):
crawlers.ERDDAPTableCrawler('http://foo', 'bar')

def test_equality(self):
"""Test equality of two DirectoryCrawler objects"""
self.assertEqual(
crawlers.ERDDAPTableCrawler('http://foo/ArgoFloats.json', 'platform_number'),
crawlers.ERDDAPTableCrawler('http://foo/ArgoFloats.json', 'platform_number'))
self.assertNotEqual(
crawlers.ERDDAPTableCrawler('http://foo/ArgoFloats.json', 'platform_number'),
crawlers.ERDDAPTableCrawler('http://foo/ArgoFloats.json', 'platform_number',
longitude_attr='lon', latitude_attr='lat'))

def test_get_ids(self):
"""Test gettings identifiers which match search terms"""
response_path = os.path.join(self.TEST_DATA_PATH, 'ids.json')
response = requests.Response()
response.status_code = 200
response.raw = open(response_path, 'rb')
crawler = crawlers.ERDDAPTableCrawler('http://foo/ArgoFloats.json', 'platform_number',
search_terms=['time>=2024-01-01T00:00:00Z',
'time<=2024-01-01T01:00:00Z'])
with mock.patch.object(crawler, '_http_get', return_value=response):
ids = crawler.get_ids()
self.assertListEqual(
list(ids),
["3901480", "5905121", "5905267", "5905498", "5905533", "5905765", "5905878",
"5906337", "5906912", "5906993", "6902906", "6903060"])
response.raw.close()

def test_get_ids_error(self):
"""An error message must be logged if an error happens when
fetching IDs
"""
error = requests.HTTPError(response=mock.Mock(content='error message'))
crawler = crawlers.ERDDAPTableCrawler('http://foo/ArgoFloats.json', 'platform_number')
with mock.patch.object(crawler, '_http_get', side_effect=error):
with self.assertLogs(logger=crawler.logger, level=logging.ERROR), \
self.assertRaises(requests.HTTPError):
list(crawler.get_ids())

def test_crawl(self):
"""Test the DatasetInfo objects returned by the crawler"""
ids = ["3901480", "5905121", "5905267"]
crawler = crawlers.ERDDAPTableCrawler(
'http://foo/ArgoFloats.json', 'platform_number',
position_qc_attr='position_qc', variables=['foo', 'bar'])
with mock.patch.object(crawler, 'get_ids', return_value=ids):
self.assertListEqual(
list(crawler.crawl()),
[
crawlers.DatasetInfo(
'http://foo/ArgoFloats.json?time,longitude,latitude,position_qc,foo,bar'
'&platform_number="3901480"',
{'entry_id': '3901480'}),
crawlers.DatasetInfo(
'http://foo/ArgoFloats.json?time,longitude,latitude,position_qc,foo,bar'
'&platform_number="5905121"',
{'entry_id': '5905121'}),
crawlers.DatasetInfo(
'http://foo/ArgoFloats.json?time,longitude,latitude,position_qc,foo,bar'
'&platform_number="5905267"',
{'entry_id': '5905267'}),
])

def test_check_qc(self):
"""Test the QC validation"""
crawler = crawlers.ERDDAPTableCrawler('foo.json', 'bar', valid_qc_codes=('1', '2'))
self.assertTrue(crawler._check_qc('1'))
self.assertTrue(crawler._check_qc('2'))
self.assertFalse(crawler._check_qc('0'))
self.assertFalse(crawler._check_qc('3'))
self.assertFalse(crawler._check_qc(1))

def test_make_coverage_url(self):
"""Test making the URL to get a dataset's temporal and spatial
coverage
"""
self.assertEqual(
crawlers.ERDDAPTableCrawler('https://foo.json', 'id',
longitude_attr='lon', latitude_attr='lat', time_attr='time',
position_qc_attr='pos_qc',
variables=['bar', 'baz'])._make_coverage_url(),
'https://foo.json?time,lon,lat,pos_qc&distinct()&orderBy("time")'
)

def test_get_coverage(self):
"""Test getting the temporal and spatial coverage for one
dataset
"""
crawler = crawlers.ERDDAPTableCrawler(
'https://foo.json', 'platform_number',
longitude_attr='longitude', latitude_attr='latitude',
time_attr='time',
position_qc_attr='position_qc', time_qc_attr='time_qc')

response = requests.Response()
response.status_code = 200
response.raw = open(os.path.join(self.TEST_DATA_PATH, 'coverage.json'), 'rb')

expected_trajectory = [
(-11.863, -0.126),
(-13.83, -0.035),
(-15.744, 0.68),
(-16.674, 0.76),
(-17.133, 1.21),
(-17.74, 1.403),
(-17.734, 1.263),
(-17.189, 1.756),
(-16.437, 1.191),
(-16.039, 1.409),
(-15.451, 1.177),
(-15.075, 1.132),
(-14.329, 1.182),
(-13.586, 1.285),
(-13.488, 1.766),
(-13.593, 2.086),
(-14.115, 2.533),
(-15.016, 2.923),
(-15.901, 3.11),
(-16.634, 3.042),
(-16.874, 3.115),
(-17.081, 3.125),
(-17.515, 3.171),
(-17.623, 3.318),
(-17.668, 3.358),
(-17.332, 3.699),
(-16.714, 3.96),
(-15.962, 4.15),
(-15.254, 3.998),
(-14.585, 4.127),
(-14.048, 4.175),
(-13.926, 4.17),
(-13.769, 4.183),
(-13.47, 4.276),
(-13.134, 4.322),
(-12.887, 4.221),
(-12.702, 4.292),
(-12.415, 4.275),
(-12.116, 4.126),
(-11.792, 3.997),
(-11.3, 3.732),
(-10.925, 3.94),
(-10.152, 3.852),
(-9.558, 4.015),
(-9.756, 4.6),
(-10.046, 5.203),
(-9.934, 5.179),
(-9.612, 4.975),
]

with mock.patch.object(crawler, '_http_get', return_value=response) as mock_http_get:
self.assertTupleEqual(
crawler.get_coverage('13858'),
(("1997-07-28T20:26:20Z", "1998-12-27T20:00:25Z"), expected_trajectory))
mock_http_get.assert_called_once_with(
crawler._make_coverage_url(),
request_parameters={'params': {'platform_number': '"13858"'}})
response.raw.close()

def test_get_coverage_error(self):
"""`get_coverage` must raise an exception when the coverage
cannot be determined
"""
crawler = crawlers.ERDDAPTableCrawler(
'https://foo.json', 'platform_number', valid_qc_codes=(1,))
with mock.patch.object(crawler, '_http_get') as mock_http_get:
mock_http_get.return_value.json.return_value = {
'table': {
'rows': [
["1997-07-28T20:26:20Z", -11.863, -0.126, "3", "1"],
["1997-08-09T01:52:41Z", -13.83, -0.035, "5", "1"],
["1997-08-19T20:44:44Z", -15.744, 0.68, "3", "1"],
["1997-08-30T20:12:43Z", -16.674, 0.76, "4", "1"],
["1997-09-10T21:03:19Z", -17.133, 1.21, "4", "1"],
]
}
}
with self.assertRaises(RuntimeError):
crawler.get_coverage('123456')

def test_get_coverage_http_error(self):
"""`get_coverage` must raise an exception when an HTTP error
happens
"""
crawler = crawlers.ERDDAPTableCrawler('https://foo.json', 'platform_number')
error = requests.HTTPError(response=mock.MagicMock())
with mock.patch.object(crawler, '_http_get', side_effect=error):
with self.assertRaises(requests.HTTPError), \
self.assertLogs(crawler.logger, logging.ERROR):
crawler.get_coverage('123456')

def test_make_product_metadata_url(self):
"""Test creating the URL to a product's metadata"""
self.assertEqual(
crawlers.ERDDAPTableCrawler(
'https://erddap.ifremer.fr/erddap/tabledap/ArgoFloats.json', 'id'
)._make_product_metadata_url(),
'https://erddap.ifremer.fr/erddap/info/ArgoFloats/index.json')

with self.assertRaises(RuntimeError):
crawlers.ERDDAPTableCrawler('https://foo.json', 'id')._make_product_metadata_url()

def test_get_product_metadata(self):
"""Test getting a product's metadata"""
crawler = crawlers.ERDDAPTableCrawler(
'https://erddap.ifremer.fr/erddap/tabledap/ArgoFloats.json', 'id')
with mock.patch.object(crawler, '_http_get') as mock_http_get:
result = crawler.get_product_metadata()
self.assertEqual(result, mock_http_get.return_value.json.return_value)
mock_http_get.assert_called_with(crawler._make_product_metadata_url())

def test_get_product_metadata_http_error(self):
"""`get_coverage` must raise an exception when an HTTP error
happens
"""
crawler = crawlers.ERDDAPTableCrawler(
'https://erddap.ifremer.fr/erddap/tabledap/ArgoFloats.json', 'id')
error = requests.HTTPError
with mock.patch.object(crawler, '_http_get', side_effect=error):
with self.assertRaises(error), self.assertLogs(crawler.logger, logging.ERROR):
crawler.get_product_metadata()

def test_get_normalized_attributes(self):
"""Test attributes normalization"""
dataset_info = crawlers.DatasetInfo('https://foo.json?id=bar', {'entry_id': 'bar'})
crawler = crawlers.ERDDAPTableCrawler('https://foo.json', 'id')
with mock.patch.object(crawler, 'get_coverage') as mock_get_coverage, \
mock.patch.object(crawler, 'get_product_metadata') as mock_get_product_metadata, \
mock.patch.object(crawler._metadata_handler, 'get_parameters') as mock_get_parameters:
mock_get_coverage.return_value = (('date1', 'date2'), [(1, 2), (3, 4)])
mock_get_product_metadata.return_value = {'baz': 'qux'}
mock_get_parameters.return_value = {'key1': 'value1', 'key2': 'value2'}
result = crawler.get_normalized_attributes(dataset_info)
self.assertDictEqual(
result,
{
'key1': 'value1',
'key2': 'value2',
'geospaas_service_name': geospaas.catalog.managers.HTTP_SERVICE_NAME,
'geospaas_service': geospaas.catalog.managers.HTTP_SERVICE
})

0 comments on commit 7743981

Please sign in to comment.