diff --git a/examples/datasets.py b/examples/datasets.py new file mode 100644 index 0000000..1a02485 --- /dev/null +++ b/examples/datasets.py @@ -0,0 +1,73 @@ +# +# Copyright (c) 2014 NSONE, Inc. +# +# License under The MIT License (MIT). See LICENSE in project root. +# + +import time + +from ns1 import NS1 + +# NS1 will use config in ~/.nsone by default +api = NS1() + +# to specify an apikey here instead, use: + +# from ns1 import Config +# config = Config() +# config.createFromAPIKey('<>') +# api = NS1(config=config) + +config = api.config + +######################### +# LOAD / CREATE DATASET # +######################### + +# create a dataset +dt = api.datasets().create( + name="my dataset", + datatype={ + "type": "num_queries", + "scope": "account", + }, + repeat=None, + timeframe={"aggregation": "monthly", "cycles": 1}, + export_type="csv", + recipient_emails=None, +) +print(dt) + +# to load an existing dataset, get a Dataset object back +dt = api.datasets().retrieve(dt.get("id")) +print(dt) + +###################### +# DOWNLOAD REPORTS # +###################### + +while True: + print("waiting for report to be generated...") + time.sleep(5) + + dt = api.datasets().retrieve(dt.get("id")) + reports = dt.get("reports") + if reports is None: + continue + + status = reports[0].get("status") + if status == "available": + print("report generation completed") + break + + if status == "failed": + print("failed to generate report") + exit(1) + +report = api.datasets().retrieveReport(dt.get("id"), reports[0].get("id")) +file_path = "%s.%s" % (dt.get("name"), dt.get("export_type")) + +with open(file_path, "w") as file: + file.write(report) + +print("dataset report saved to", file_path) diff --git a/ns1/__init__.py b/ns1/__init__.py index 384003b..3b11e59 100644 --- a/ns1/__init__.py +++ b/ns1/__init__.py @@ -293,6 +293,15 @@ def pools(self): return ns1.rest.pools.Pools(self.config) + def datasets(self): + """ + Return a new raw REST interface to Datasets resources + :rtype: :py:class:`ns1.rest.datasets.Datasets` + """ + import ns1.rest.datasets + + return ns1.rest.datasets.Datasets(self.config) + # HIGH LEVEL INTERFACE def loadZone(self, zone, callback=None, errback=None): """ diff --git a/ns1/dataset.py b/ns1/dataset.py new file mode 100644 index 0000000..0dade64 --- /dev/null +++ b/ns1/dataset.py @@ -0,0 +1,174 @@ +from ns1.rest.datasets import Datasets + + +class DatasetException(Exception): + pass + + +class Dataset(object): + """ + High level object representing a dataset. + """ + + def __init__(self, config): + """ + Create a new high level Dataset object + :param ns1.config.Config config: config object + """ + self._rest = Datasets(config) + self.config = config + self.data = None + + def __repr__(self): + return "" % ( + self.__getitem__("id"), + self.__getitem__("name"), + self.__getitem__("datatype"), + self.__getitem__("repeat"), + self.__getitem__("timeframe"), + self.__getitem__("export_type"), + self.__getitem__("recipient_emails"), + ) + + def __getitem__(self, item: str): + if not self.data: + raise DatasetException("dataset not loaded") + return self.data.get(item, None) + + def reload(self, callback=None, errback=None): + """ + Reload dataset data from the API. + :param callback: function call back once the call has completed + :param errback: function call back if the call fails + """ + return self.load(reload=True, callback=callback, errback=errback) + + def load(self, id: str = None, callback=None, errback=None, reload=False): + """ + Load dataset data from the API. + :param str id: dataset id to load + :param callback: function call back once the call has completed + :param bool reload: whether to reuse the instance data instead of fetching it from the server + """ + if not reload and self.data: + return self.data + if id is None and self.data: + id = self.__getitem__("id") + if id is None: + raise DatasetException("no dataset id: did you mean to create?") + + def success(result: dict, *args): + self.data = result + if callback: + return callback(self) + else: + return self + + return self._rest.retrieve(id, callback=success, errback=errback) + + def loadFromDict(self, dt: dict): + """ + Load dataset data from a dictionary. + :param dict dt: dictionary containing *at least* either an id or domain/path/target + """ + if "id" in dt or ( + "name" in dt + and "datatype" in dt + and "repeat" in dt + and "timeframe" in dt + and "export_type" in dt + and "recipient_emails" in dt + ): + self.data = dt + return self + else: + raise DatasetException("insufficient parameters") + + def delete(self, callback=None, errback=None): + """ + Delete the dataset. + :param callback: function call back once the call has completed + :param errback: function call back if the call fails + """ + id = self.__getitem__("id") + return self._rest.delete(id, callback=callback, errback=errback) + + def create( + self, + name: str, + datatype: dict, + repeat: dict, + timeframe: dict, + export_type: str, + recipient_emails: list, + callback=None, + errback=None, + **kwargs + ): + """ + Create a new dataset. Pass a list of keywords and their values to + configure. For the list of keywords available for dataset configuration, + see :attr:`ns1.rest.datasets.Datasets.PASSTHRU_FIELDS` + :param str name: the name of the dataset + :param str datatype: datatype settings to define the type of data to be pulled + :param str repeat: repeat settings to define recurrent reports + :param str timeframe: timeframe settings for the data to be pulled + :param str export_type: output format of the report + :param str recipient_emails: list of user emails that will receive a copy of the report + :param callback: function call back once the call has completed + :param errback: function call back if the call fails + """ + if self.data: + raise DatasetException("dataset already loaded") + + return self._rest.create( + name, + datatype, + repeat, + timeframe, + export_type, + recipient_emails, + callback=callback, + errback=errback, + **kwargs + ) + + def listDatasets(self, callback=None, errback=None): + """ + Lists all datasets currently configured. + :param callback: function call back once the call has completed + :param errback: function call back if the call fails + :return: a list of Dataset objects + """ + + def success(result, *args): + ret = [] + for dt in result: + ret.append(Dataset(self.config).loadFromDict(dt)) + if callback: + return callback(ret) + else: + return ret + + return Datasets(self.config).list(callback=success, errback=errback) + + def retrieveReport( + self, rp_id: str, dt_id: str = None, callback=None, errback=None + ): + """ + Retrieves a generated report given a dataset id and a report id + :param str rp_id: the id of the generated report to download + :param str dt_id: the id of the dataset that the above report belongs to + :param callback: function call back once the call has completed + :param errback: function call back if the call fails + :return: generated report + """ + + if dt_id is None and self.data: + dt_id = self.__getitem__("id") + if dt_id is None: + raise DatasetException("no dataset id: did you mean to create?") + + return Datasets(self.config).retrieveReport( + dt_id, rp_id, callback=callback, errback=errback + ) diff --git a/ns1/rest/datasets.py b/ns1/rest/datasets.py new file mode 100644 index 0000000..b65e94c --- /dev/null +++ b/ns1/rest/datasets.py @@ -0,0 +1,99 @@ +from . import resource + + +class Datasets(resource.BaseResource): + ROOT = "datasets" + + PASSTHRU_FIELDS = [ + "name", + "datatype", + "repeat", + "timeframe", + "export_type", + "recipient_emails", + ] + + def _buildBody( + self, + name: str, + datatype: dict, + repeat: dict, + timeframe: dict, + export_type: str, + recipient_emails: list, + **kwargs + ): + body = { + "name": name, + "datatype": datatype, + "repeat": repeat, + "timeframe": timeframe, + "export_type": export_type, + "recipient_emails": recipient_emails, + } + self._buildStdBody(body, kwargs) + return body + + def create( + self, + name: str, + datatype: dict, + repeat: dict, + timeframe: dict, + export_type: str, + recipient_emails: list, + callback=None, + errback=None, + **kwargs + ): + body = self._buildBody( + name, + datatype, + repeat, + timeframe, + export_type, + recipient_emails, + **kwargs + ) + return self._make_request( + "PUT", + "%s" % self.ROOT, + body=body, + callback=callback, + errback=errback, + ) + + def delete(self, dtId: str, callback=None, errback=None): + return self._make_request( + "DELETE", + "%s/%s" % (self.ROOT, dtId), + callback=callback, + errback=errback, + ) + + def list(self, callback=None, errback=None): + return self._make_request( + "GET", + "%s" % self.ROOT, + callback=callback, + errback=errback, + ) + + def retrieve(self, dtId: str, callback=None, errback=None): + return self._make_request( + "GET", + "%s/%s" % (self.ROOT, dtId), + callback=callback, + errback=errback, + ) + + def retrieveReport( + self, dtId: str, rpId: str, callback=None, errback=None + ): + return self._make_request( + "GET", + "%s/%s/reports/%s" % (self.ROOT, dtId, rpId), + callback=callback, + errback=errback, + skip_json_parsing=True, + ) diff --git a/ns1/rest/transport/requests.py b/ns1/rest/transport/requests.py index 5fcd77a..65cfb9a 100644 --- a/ns1/rest/transport/requests.py +++ b/ns1/rest/transport/requests.py @@ -45,7 +45,17 @@ def _rateLimitHeaders(self, headers): "remaining": int(headers.get("X-RateLimit-Remaining", 100)), } - def _send(self, method, url, headers, data, files, params, errback): + def _send( + self, + method, + url, + headers, + data, + files, + params, + errback, + skip_json_parsing, + ): resp = self.REQ_MAP[method]( url, headers=headers, @@ -80,6 +90,9 @@ def _send(self, method, url, headers, data, files, params, errback): else: raise ResourceException("server error", resp, resp.text) + if resp.text and skip_json_parsing: + return response_headers, resp.text + # TODO make sure json is valid if a body is returned if resp.text: try: @@ -106,18 +119,33 @@ def send( callback=None, errback=None, pagination_handler=None, + skip_json_parsing=False, ): self._logHeaders(headers) resp_headers, jsonOut = self._send( - method, url, headers, data, files, params, errback + method, + url, + headers, + data, + files, + params, + errback, + skip_json_parsing, ) if self._follow_pagination and pagination_handler is not None: next_page = get_next_page(resp_headers) while next_page is not None: self._log.debug("following pagination to: %s" % next_page) next_headers, next_json = self._send( - method, next_page, headers, data, files, params, errback + method, + next_page, + headers, + data, + files, + params, + errback, + skip_json_parsing, ) jsonOut = pagination_handler(jsonOut, next_json) next_page = get_next_page(next_headers) diff --git a/tests/unit/test_datasets.py b/tests/unit/test_datasets.py new file mode 100644 index 0000000..c50de2d --- /dev/null +++ b/tests/unit/test_datasets.py @@ -0,0 +1,145 @@ +import pytest + +import ns1.rest.datasets +from ns1 import NS1 + +try: # Python 3.3 + + import unittest.mock as mock +except ImportError: + import mock + + +@pytest.fixture +def datasets_config(config): + config.loadFromDict( + { + "endpoint": "api.nsone.net", + "default_key": "test1", + "keys": { + "test1": { + "key": "key-1", + "desc": "test key number 1", + "writeLock": True, + } + }, + } + ) + + return config + + +@pytest.mark.parametrize("url", [("datasets")]) +def test_rest_datasets_list(datasets_config, url): + z = NS1(config=datasets_config).datasets() + z._make_request = mock.MagicMock() + z.list() + z._make_request.assert_called_once_with( + "GET", + url, + callback=None, + errback=None, + ) + + +@pytest.mark.parametrize( + "dtId, url", + [ + ( + "96529d62-fb0c-4150-b5ad-6e5b8b2736f6", + "datasets/96529d62-fb0c-4150-b5ad-6e5b8b2736f6", + ) + ], +) +def test_rest_dataset_retrieve(datasets_config, dtId, url): + z = NS1(config=datasets_config).datasets() + z._make_request = mock.MagicMock() + z.retrieve(dtId) + z._make_request.assert_called_once_with( + "GET", + url, + callback=None, + errback=None, + ) + + +@pytest.mark.parametrize("url", [("datasets")]) +def test_rest_dataset_create(datasets_config, url): + z = NS1(config=datasets_config).datasets() + z._make_request = mock.MagicMock() + z.create( + name="my dataset", + datatype={ + "type": "num_queries", + "scope": "account", + }, + repeat=None, + timeframe={"aggregation": "monthly", "cycles": 1}, + export_type="csv", + recipient_emails=None, + ) + + z._make_request.assert_called_once_with( + "PUT", + url, + body={ + "name": "my dataset", + "datatype": { + "type": "num_queries", + "scope": "account", + }, + "timeframe": {"aggregation": "monthly", "cycles": 1}, + "repeat": None, + "export_type": "csv", + "recipient_emails": None, + }, + callback=None, + errback=None, + ) + + +@pytest.mark.parametrize( + "dtId, url", + [ + ( + "96529d62-fb0c-4150-b5ad-6e5b8b2736f6", + "datasets/96529d62-fb0c-4150-b5ad-6e5b8b2736f6", + ) + ], +) +def test_rest_datasets_delete(datasets_config, dtId, url): + z = NS1(config=datasets_config).datasets() + z._make_request = mock.MagicMock() + z.delete(dtId) + z._make_request.assert_called_once_with( + "DELETE", + url, + callback=None, + errback=None, + ) + + +def test_rest_datasets_buildbody(datasets_config): + z = ns1.rest.datasets.Datasets(datasets_config) + kwargs = { + "name": "my dataset", + "datatype": { + "type": "num_queries", + "scope": "account", + }, + "timeframe": {"aggregation": "monthly", "cycles": 1}, + "repeat": None, + "recipient_emails": None, + "export_type": "csv", + } + body = { + "name": "my dataset", + "datatype": { + "type": "num_queries", + "scope": "account", + }, + "timeframe": {"aggregation": "monthly", "cycles": 1}, + "repeat": None, + "recipient_emails": None, + "export_type": "csv", + } + assert z._buildBody(**kwargs) == body