diff --git a/.env b/.env index 2cf00bdb..07e8361b 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ PROJECT_ID=artefact-docker-containers -DOCKER_IMAGE=nautilus-connectors-kit-dev -DOCKER_TAG=v1.1 +DOCKER_IMAGE=artefactory-connectors-kit-dev +DOCKER_TAG=v2.0 DOCKER_REGISTRY=eu.gcr.io diff --git a/.flake8 b/.flake8 index 465ee6da..a62b619c 100644 --- a/.flake8 +++ b/.flake8 @@ -6,4 +6,4 @@ ignore = E203, # Conflicts with black, see https://github.com/psf/black/issues/544 W605, # Invalid excape sequences like "\(" in string regexes -per-file-ignores = nck/readers/adobe_reader.py:E731 +per-file-ignores = ack/readers/adobe_reader.py:E731 diff --git a/.github/workflows/deploy_doc.yaml b/.github/workflows/deploy_doc.yaml new file mode 100644 index 00000000..26d2a9f1 --- /dev/null +++ b/.github/workflows/deploy_doc.yaml @@ -0,0 +1,60 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +name: Build and deploy documentation to Github pages + +on: + push: + branches: + - dev + +jobs: + + build-and-deploy: + + runs-on: ubuntu-latest + + steps: + + - name: Checkout + uses: actions/checkout@v2 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: 3.8 + + - name: Install Python dependencies + run: | + pip3 install setuptools + pip3 install sphinx sphinx-rtd-theme + + - name: Build Sphinx doc + run: | + cd docs/ + make html + + - name: Deploy Github Pages + uses: JamesIves/github-pages-deploy-action@3.7.1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BRANCH: gh-pages + FOLDER: docs/build/html/ + CLEAN: true diff --git a/.github/workflows/buildtogcp.yml b/.github/workflows/deploy_image.yml similarity index 58% rename from .github/workflows/buildtogcp.yml rename to .github/workflows/deploy_image.yml index 7b59a7e2..071efb8e 100644 --- a/.github/workflows/buildtogcp.yml +++ b/.github/workflows/deploy_image.yml @@ -15,25 +15,24 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -# This workflow will build a docker container and publish it to Google Container Registry. +# TO CONFIGURE THIS WORKFLOW +# 1. Set up secrets in your workspace: +# - GCP_PROJECT with the name of the project +# - GCP_EMAIL with the service account email +# - GCP_KEY with the service account key +# 2. Deployment infos are by now in the .env module: +# don't forget to change the version when updating the master branch. - -# To configure this workflow: -# -# 1. Set up secrets in your workspace: GCP_PROJECT with the name of the project, GCP_EMAIL with the service account -# email, GCP_KEY with the service account key. -# -# 2. The deployments info are by now in the .env, dont forget to change the version when updating the master. - -name: Build and Deploy image to GCP +name: Build and publish Docker image to GCP Container Registry on: pull_request: types: [closed] -# Environment variables available to all jobs and steps in this workflow +# Environment variables available to all jobs and steps in this workflow: env: + GCP_EMAIL: ${{ secrets.GCP_EMAIL }} PROJECT_ID: ${{ secrets.PROJECT_ID }} DOCKER_TAG: ${{ github.run_id }} @@ -42,31 +41,28 @@ env: CLOUDSDK_PYTHON_SITEPACKAGES: 1 jobs: - setup-build-publish: - if: github.event.pull_request.merged == true - name: Setup, Build, Publish - runs-on: ubuntu-latest - steps: - - - name: Checkout - uses: actions/checkout@v1 - # Setup gcloud CLI - - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master - with: - version: '290.0.1' - service_account_email: ${{ secrets.GCP_EMAIL }} - service_account_key: ${{ secrets.GCP_KEY }} + build-and-deploy: - - name: Clean ENV - run: | - > .env + runs-on: ubuntu-latest + if: github.event.pull_request.merged == true + steps: - # Build the Docker image - - name: Build And Publish - run: | - make publish_base_image + - name: Checkout + uses: actions/checkout@v1 + - name: Setup gcloud CLI + uses: GoogleCloudPlatform/github-actions/setup-gcloud@master + with: + version: '290.0.1' + service_account_email: ${{ secrets.GCP_EMAIL }} + service_account_key: ${{ secrets.GCP_KEY }} + - name: Clean .env + run: | + > .env + - name: Build and publish Docker image + run: | + make publish_base_image diff --git a/.github/workflows/lintandruntests.yml b/.github/workflows/lint_and_run_tests.yml similarity index 50% rename from .github/workflows/lintandruntests.yml rename to .github/workflows/lint_and_run_tests.yml index c37909c8..3ba9986b 100644 --- a/.github/workflows/lintandruntests.yml +++ b/.github/workflows/lint_and_run_tests.yml @@ -15,37 +15,42 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + name: Lint and run tests on: [push] jobs: + build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - - name: Set up Python 3.8 - uses: actions/setup-python@v1 - with: - python-version: 3.8 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - name: Install dev dependencies - run: | - pip install -r requirements-dev.txt - - name: Lint with flake8 - run: | - # stop nck build if there are Python syntax errors or undefined names - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - # Uncomment when the repo is clean :) - flake8 ./nck/ --count --max-complexity=10 --max-line-length=127 --statistics - flake8 ./tests/ --count --select=E9,F63,F7,F82 --show-source --statistics - flake8 ./tests/ --count --max-complexity=10 --max-line-length=127 --statistics - - - name: Test with nose - run: | - nosetests \ No newline at end of file + + - name: Checkout + uses: actions/checkout@v1 + + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: 3.8 + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements-dev.txt + + - name: Lint with flake8 + run: | + # stop ack build if there are Python syntax errors or undefined names + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + # Uncomment when the repo is clean :) + flake8 ./ack/ --count --max-complexity=10 --max-line-length=127 --statistics + flake8 ./tests/ --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 ./tests/ --count --max-complexity=10 --max-line-length=127 --statistics + + - name: Test with nose + run: | + nosetests diff --git a/.gitignore b/.gitignore index b678a637..475c79df 100644 --- a/.gitignore +++ b/.gitignore @@ -66,7 +66,9 @@ instance/ .scrapy # Sphinx documentation -docs/_build/ +docs/build/** +!docs/build/ +!docs/build/.gitkeep # PyBuilder target/ @@ -112,7 +114,7 @@ tmp/ .idea/ credentials/* -.nck.egg-info +.ack.egg-info .DS_Store # Pipenv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 19a3fd3d..a7eb53f8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,10 +1,10 @@ repos: - repo: https://github.com/ambv/black - rev: stable + rev: 19.10b0 hooks: - id: black entry: black - language_version: python3.7 + language_version: python3 - repo: https://github.com/pre-commit/pre-commit-hooks rev: v1.2.3 hooks: diff --git a/AUTHORS.rst b/AUTHORS.rst deleted file mode 100644 index 433ee283..00000000 --- a/AUTHORS.rst +++ /dev/null @@ -1,13 +0,0 @@ -======= -Credits -======= - -Development Lead ----------------- - -* Artefact Artefact - -Contributors ------------- - -* Hamza SENHAJI RHAZI diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 445df80d..00000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,43 +0,0 @@ -# Contributing to Nautilus Connectors Kit -We love your input! We want to make contributing to this project as easy and transparent as possible, whether it's: - -- Reporting a bug -- Discussing the current state of the code -- Submitting a fix -- Proposing new features -- Becoming a maintainer - -## We Develop with Github -We use github to host code, to track issues and feature requests, as well as accept pull requests. - -## We Use [Github Flow](https://guides.github.com/introduction/flow/index.html), So All Code Changes Happen Through Pull Requests -Pull requests are the best way to propose changes to the codebase (we use [Github Flow](https://guides.github.com/introduction/flow/index.html)). We actively welcome your pull requests: - -1. Fork the repo and create your branch from `master`. -2. If you've added code that should be tested, add tests. -3. If you've changed APIs, update the documentation. -4. Ensure the test suite passes. -5. Make sure your code lints. -6. Issue that pull request! - -## Report bugs using Github's [issues](https://github.com/briandk/transcriptase-atom/issues) -We use GitHub issues to track public bugs. Report a bug by [opening a new issue](); it's that easy! - -## Write bug reports with detail, background, and sample code -[This is an example](http://stackoverflow.com/q/12488905/180626) of a bug report I wrote, and I think it's not a bad model. Here's [another example from Craig Hockenberry](http://www.openradar.me/11905408), an app developer whom I greatly respect. - -**Great Bug Reports** tend to have: - -- A quick summary and/or background -- Steps to reproduce - - Be specific! - - Give sample code if you can. [My stackoverflow question](http://stackoverflow.com/q/12488905/180626) includes sample code that *anyone* with a base R setup can run to reproduce what I was seeing -- What you expected would happen -- What actually happens -- Notes (possibly including why you think this might be happening, or stuff you tried that didn't work) - -People *love* thorough bug reports. I'm not even kidding. - -## License - -By contributing, you agree that your contributions will be licensed under its GNU LESSER GENERAL PUBLIC LICENSE. diff --git a/Dockerfile b/Dockerfile index 19aa5200..062067fb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,41 +1,14 @@ -FROM python:3.7 +FROM python:3.8-slim-buster -# Install Unix packages -RUN apt-get update && apt-get install -y \ - supervisor \ - cron \ - nano \ - git \ - build-essential \ - unzip \ - libaio-dev \ - && mkdir -p /opt/data/api +ENV PYTHONDONTWRITEBYTECODE True +ENV PYTHONUNBUFFERED True -# Oracle Dependencies -ADD ./vendor /opt/vendor +ADD requirements.txt . +RUN python -m pip install -r requirements.txt -WORKDIR /opt/vendor +WORKDIR /app +ADD . /app -ENV ORACLE_HOME=/opt/oracle/instantclient -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ORACLE_HOME - -ENV OCI_HOME=/opt/oracle/instantclient -ENV OCI_LIB_DIR=/opt/oracle/instantclient -ENV OCI_INCLUDE_DIR=/opt/oracle/instantclient/sdk/include - -# Install Oracle -RUN mkdir /opt/oracle -RUN chmod +x /opt/vendor/install.sh -RUN /opt/vendor/install.sh - -RUN mkdir /app - -# Copy code -ADD . /app/ -RUN chmod -R 0644 /app -WORKDIR /app/ ENV PYTHONPATH=${PYTHONPATH}:. -RUN python setup.py install - -ENTRYPOINT ["nckrun"] +ENTRYPOINT ["python", "ack/entrypoints/cli/main.py"] diff --git a/Makefile b/Makefile index eb6d2af5..96d16ea3 100644 --- a/Makefile +++ b/Makefile @@ -1,22 +1,5 @@ include .env -.PHONY: clean -clean: clean_pyc - -.PHONY: clean_pyc -clean_pyc: - find . -name "*pyc" -exec rm -f {} \; - -.PHONY: requirements -requirements: - pip install -r requirements-dev.txt - -.PHONY: dist -dist: clean ## builds source and wheel package - python3 setup.py sdist - python3 setup.py bdist_wheel - ls -l dist - define build_docker_image docker image build --rm -t $(1):$(2) -f $(3) . docker tag $(1):$(2) $(1):latest @@ -32,11 +15,10 @@ configure_docker_auth: gcloud --project $(PROJECT_ID) auth configure-docker .PHONY: build_base_image -build_base_image: clean +build_base_image: $(call build_docker_image,${DOCKER_IMAGE},${DOCKER_TAG},Dockerfile) .PHONY: publish_base_image publish_base_image: build_base_image configure_docker_auth $(call publish_docker_image,${DOCKER_IMAGE},${DOCKER_TAG}) $(call publish_docker_image,${DOCKER_IMAGE},latest) - diff --git a/README.md b/README.md index a2398ba6..1528941c 100644 --- a/README.md +++ b/README.md @@ -1,103 +1,65 @@ -# Nautilus Connectors Kit +# Artefactory Connectors Kit -Nautilus connectors kit is a tool which aim is getting raw data from different sources and store them as-is into different destinations (GCS, BQ, local files, etc.). +**ACK is an E(T)L tool specialized in API data ingestion. It is accessible through a Command-Line Interface. The application allows you to easily extract, stream and load data (with minimum transformations), from the API source to the destination of your choice.** -## List of connectors +As of now, the most common output format of data loaded by the application is .njson (i.e. a file of n lines, where each line is a json-like dictionary). -### Readers - -- Adobe Analytics 1.4 -- Adobe Analytics 2.0 -- Amazon S3 -- Facebook Marketing -- Google Ads -- Google Analytics -- Google Cloud Storage -- Google Campaign Manager -- Google Display & Video 360 -- Google Search Ads 360 -- Google Search Console -- Google Sheets -- Oracle -- MySQL -- Radarly -- SalesForce -- The Trade Desk -- Twitter Ads -- Yandex Campaign -- Yandex Statistics - -### Writers +Official documentation is available [here](https://artefactory.github.io/artefactory-connectors-kit/). -- Google BigQuery -- Google Cloud Storage -- Amazon S3 -- Local File -- Console (Debug) +--- ## Philosophy -NCK is divided in three main components : Readers, Streams, and Writers. - -- [Readers](./nck/readers/README.md) role is to read data from distant sources and transform it into stream object -- [Streams](./nck/streams/README.md) role is to be read as file or line by line. There are local objects usable by writers -- [Writers](./nck/writers/README.md) role is to write stream into distant location - -## Usage -**nck** could be consumed through a docker image or can be installed as package and then be used as library or a binary. +The application is composed of **3 main components** (*implemented as Python classes*). When combined, these components act as an E(T)L pipeline, allowing you to stream data from a source to the destination of your choice: -### Docker image +- [Readers](ack/readers) are reading data from an API source, and transform it into a stream object. +- [Streams](ack/streams) (*transparent to the end-user*) are local objects used by writers to process individual records collected from the source. +- [Writers](ack/writers) are writing the output stream object to the destination of your choice. -1. Build Docker image using `make build_base_image` -2. Run image to get help `docker run --rm nautilus-connector-kit:latest --help` +## Available connectors -### Develop with python - -First install dependencies with: - -```bash -pip install -r requirements.txt -``` -Then run: +As of now, the application is offering the following Readers & Writers: -``` -python nck/entrypoint.py -``` - -### Package - -#### Generate distribs : - -* Exec cmd `make dist` (it generates a source distrib and a wheel in the created directory dist/) - -It is advised to do the following in a virtual env - -#### Create a virtual env : - -`python3 -m venv testenv; source testenv/bin/activate` - -#### Install via the wheel in dist : -`pip wheel --wheel-dir=wheels -r requirements.txt (that creates folder of wheels for packages in requierements)` - -`pip install --no-index --find-links=./wheels dist/[nck-file-generated].whl` - -#### Install in editable mode : -`pip install -e .` -#### Install via the setup.py : - -`python setup.py install` - -#### Usage as binary : - -* Run cmd `nckrun --help` (which is equivalent to python nck/entrypoint.py) - -#### Usage as library : - -`from nck.readers.dbm_reader import DbmReader` +### Readers -#### Some references on packaging : +- **Analytics** + - Adobe Analytics 1.4 + - Adobe Analytics 2.0 + - Google Analytics +- **Advertising - Adserver** + - Google Campaign Manager +- **Advertising - DSP** + - Google Display & Video 360 + - The Trade Desk +- **Advertising - Search** + - Google Ads + - Google Search Ads 360 + - Google Search Console + - Yandex Campaign + - Yandex Statistics +- **Advertising - Social** + - Facebook Marketing + - MyTarget + - Radarly + - Twitter Ads +- **CRM** + - SalesForce +- **Databases** + - MySQL +- **DevTools** + - Confluence +- **Files (.csv, .njson)** + - Amazon S3 + - Google Cloud Storage + - Google Sheets +### Writers -* https://manikos.github.io/a-tour-on-python-packaging -* http://lucumr.pocoo.org/2014/1/27/python-on-wheels/ -* https://pip.readthedocs.io/en/1.4.1/cookbook.html#controlling-setup-requires +- **Data Warehouses** + - Google BigQuery +- **Debugging** + - Console +- **Files (.njson)** + - Amazon S3 + - Google Cloud Storage + - Local file diff --git a/nck/__init__.py b/ack/__init__.py similarity index 100% rename from nck/__init__.py rename to ack/__init__.py diff --git a/nck/commands/__init__.py b/ack/clients/__init__.py similarity index 100% rename from nck/commands/__init__.py rename to ack/clients/__init__.py diff --git a/nck/helpers/__init__.py b/ack/clients/adobe_analytics/__init__.py similarity index 100% rename from nck/helpers/__init__.py rename to ack/clients/adobe_analytics/__init__.py diff --git a/nck/clients/adobe_client.py b/ack/clients/adobe_analytics/client.py similarity index 92% rename from nck/clients/adobe_client.py rename to ack/clients/adobe_analytics/client.py index 359fcc94..574e3b05 100644 --- a/nck/clients/adobe_client.py +++ b/ack/clients/adobe_analytics/client.py @@ -16,20 +16,18 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging from datetime import datetime, timedelta -import requests + import jwt -from tenacity import retry, wait_exponential, stop_after_delay +import requests +from ack.config import logger +from tenacity import retry, stop_after_delay, wait_exponential IMS_HOST = "ims-na1.adobelogin.com" IMS_EXCHANGE = "https://ims-na1.adobelogin.com/ims/exchange/jwt" -logging.basicConfig(level="INFO") -logger = logging.getLogger() - -class AdobeClient: +class AdobeAnalyticsClient: """ Create an Adobe Client for JWT Authentification. Doc: https://github.com/AdobeDocs/adobeio-auth/blob/stage/JWT/JWT.md @@ -45,7 +43,7 @@ def __init__(self, client_id, client_secret, tech_account_id, org_id, private_ke self.private_key = private_key # Creating jwt_token attribute - logging.info("Getting jwt_token.") + logger.info("Getting jwt_token.") self.jwt_token = jwt.encode( { "exp": datetime.utcnow() + timedelta(seconds=30), @@ -59,7 +57,7 @@ def __init__(self, client_id, client_secret, tech_account_id, org_id, private_ke ) # Creating access_token attribute - logging.info("Getting access_token.") + logger.info("Getting access_token.") self.access_token = self.get_access_token() @retry(wait=wait_exponential(multiplier=60, min=60, max=1200), stop=stop_after_delay(3600)) diff --git a/nck/streams/__init__.py b/ack/clients/api/__init__.py similarity index 100% rename from nck/streams/__init__.py rename to ack/clients/api/__init__.py diff --git a/nck/clients/api_client.py b/ack/clients/api/client.py similarity index 92% rename from nck/clients/api_client.py rename to ack/clients/api/client.py index f95971a8..e9613570 100644 --- a/nck/clients/api_client.py +++ b/ack/clients/api/client.py @@ -15,16 +15,13 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging -from typing import Dict, Any -from requests_toolbelt import sessions +from typing import Any, Dict -logger = logging.getLogger("ApiClient") +from requests_toolbelt import sessions class ApiClient: - def __init__(self, token, base_url): self.token = token self.session = sessions.BaseUrlSession(base_url=base_url) @@ -35,7 +32,7 @@ def execute_request( url: str = "", body: Dict[str, Any] = None, headers: Dict[str, str] = None, - stream: bool = False + stream: bool = False, ): headers["Authorization"] = f"Bearer {self.token}" response = self.session.request(method, url, json=body, headers=headers) diff --git a/nck/helpers/api_client_helper.py b/ack/clients/api/helper.py similarity index 79% rename from nck/helpers/api_client_helper.py rename to ack/clients/api/helper.py index 3582700d..9f3a98d4 100644 --- a/nck/helpers/api_client_helper.py +++ b/ack/clients/api/helper.py @@ -15,24 +15,19 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + from typing import Dict -import logging -logging.getLogger("ApiClient") +from ack.config import logger POSSIBLE_STRING_FORMATS = ["PascalCase"] -def get_dict_with_keys_converted_to_new_string_format( - str_format: str = "PascalCase", **kwargs -) -> Dict: +def get_dict_with_keys_converted_to_new_string_format(str_format: str = "PascalCase", **kwargs) -> Dict: if str_format in POSSIBLE_STRING_FORMATS and str_format == "PascalCase": return {to_pascal_key(key): value for key, value in kwargs.items()} else: - logging.error(( - "Unable to convert to new string format. " - "Format not in %s" - ) % POSSIBLE_STRING_FORMATS) + logger.error(f"Unable to convert to new string format. Format not in {POSSIBLE_STRING_FORMATS}") return None diff --git a/nck/utils/__init__.py b/ack/clients/google/__init__.py similarity index 100% rename from nck/utils/__init__.py rename to ack/clients/google/__init__.py diff --git a/nck/helpers/google_base.py b/ack/clients/google/client.py similarity index 68% rename from nck/helpers/google_base.py rename to ack/clients/google/client.py index c2524847..2120f802 100644 --- a/nck/helpers/google_base.py +++ b/ack/clients/google/client.py @@ -15,42 +15,39 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -""" - This was adapted from airflow google Base Hook. - A base hook for Google cloud-related hooks. Google cloud has a shared REST - API client that is built in the same way no matter which service you use. - This class helps construct and authorize the credentials needed to then - call googleapiclient.discovery.build() to actually discover and build a client - for a Google cloud service. +# This was adapted from Airflow Google Base Hook. - Three ways of authentication are supported: - Default credentials: Only the 'Project Id' is required. You'll need to - have set up default credentials, such as by the - ``GOOGLE_APPLICATION_DEFAULT`` environment variable or from the metadata - server on Google Compute Engine. - JSON key file: Specify 'Project Id', 'Keyfile Path' and 'Scope'. - Legacy P12 key files are not supported. - JSON data provided the parameters -""" +# A base hook for Google cloud-related hooks. Google cloud has a shared REST +# API client that is built in the same way no matter which service you use. +# This class helps construct and authorize the credentials needed to then +# call googleapiclient.discovery.build() to actually discover and build a client +# for a Google cloud service. + +# Three ways of authentication are supported: +# - Default credentials: only the 'Project Id' is required. You'll need to +# have set up default credentials, such as by the +# - GOOGLE_APPLICATION_DEFAULT environment variable or from the metadata +# server on Google Compute Engine. +# - JSON key file: specify 'Project Id', 'Keyfile Path' and 'Scope'. +# - Legacy P12 key files are not supported. +# - JSON data provided the parameters -import logging import json import os +from typing import Dict, Optional, Sequence + +from ack.config import logger + import google.auth import google.oauth2.service_account -from typing import Dict, Optional, Sequence - -_DEFAULT_SCOPES = ( - "https://www.googleapis.com/auth/cloud-platform", -) # type: Sequence[str] +_DEFAULT_SCOPES = ("https://www.googleapis.com/auth/cloud-platform",) # type: Sequence[str] -class GoogleBaseClass: +class GoogleClient: scopes = _DEFAULT_SCOPES - log = logging.getLogger("Google_Base_Hook") def _get_credentials_and_project_id(self) -> google.auth.credentials.Credentials: """ @@ -59,7 +56,7 @@ def _get_credentials_and_project_id(self) -> google.auth.credentials.Credentials key_path = os.environ.get("GCP_KEY_PATH") # type: Optional[str] keyfile_dict = os.environ.get("GCP_KEY_JSON") # type: Optional[str] if not key_path and not keyfile_dict: - self.log.info( + logger.info( "Getting connection using `google.auth.default()` " "since no key file is defined for hook." "You can pass a key as json in GCP_KEY_JSON " @@ -69,15 +66,11 @@ def _get_credentials_and_project_id(self) -> google.auth.credentials.Credentials elif key_path: # Get credentials from a JSON file. if key_path.endswith(".json"): - self.log.debug("Getting connection using JSON key file %s" % key_path) - credentials = google.oauth2.service_account.Credentials.from_service_account_file( - key_path, scopes=self.scopes - ) + logger.debug(f"Getting connection using JSON key file {key_path}") + credentials = google.oauth2.service_account.Credentials.from_service_account_file(key_path, scopes=self.scopes) project_id = credentials.project_id elif key_path.endswith(".p12"): - raise Exception( - "Legacy P12 key file are not supported, " "use a JSON key file." - ) + raise Exception("Legacy P12 key file are not supported, " "use a JSON key file.") else: raise Exception("Unrecognised extension for key file.") else: @@ -88,9 +81,7 @@ def _get_credentials_and_project_id(self) -> google.auth.credentials.Credentials # Depending on how the JSON was formatted, it may contain # escaped newlines. Convert those to actual newlines. - keyfile_dict_json["private_key"] = keyfile_dict_json[ - "private_key" - ].replace("\\n", "\n") + keyfile_dict_json["private_key"] = keyfile_dict_json["private_key"].replace("\\n", "\n") credentials = google.oauth2.service_account.Credentials.from_service_account_info( keyfile_dict_json, scopes=self.scopes diff --git a/tests/helpers/__init__.py b/ack/clients/google_dcm/__init__.py similarity index 100% rename from tests/helpers/__init__.py rename to ack/clients/google_dcm/__init__.py diff --git a/nck/clients/dcm_client.py b/ack/clients/google_dcm/client.py similarity index 96% rename from nck/clients/dcm_client.py rename to ack/clients/google_dcm/client.py index a0d819f9..2bbc2a01 100644 --- a/nck/clients/dcm_client.py +++ b/ack/clients/google_dcm/client.py @@ -15,7 +15,8 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging + +from ack.config import logger import httplib2 import requests @@ -23,12 +24,10 @@ from oauth2client import client, GOOGLE_TOKEN_URI from googleapiclient import discovery -logger = logging.getLogger("DCM_client") - DOWNLOAD_FORMAT = "CSV" -class DCMClient: +class GoogleDCMClient: API_NAME = "dfareporting" API_VERSION = "v3.3" @@ -44,9 +43,7 @@ def __init__(self, access_token, client_id, client_secret, refresh_token): ) http = self._credentials.authorize(httplib2.Http()) self._credentials.refresh(http) - self.auth = ( - f"{self._credentials.token_response['token_type']} {self._credentials.token_response['access_token']}" - ) + self.auth = f"{self._credentials.token_response['token_type']} {self._credentials.token_response['access_token']}" self._service = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) @staticmethod diff --git a/nck/helpers/dbm_helper.py b/ack/clients/google_sa360/__init__.py similarity index 83% rename from nck/helpers/dbm_helper.py rename to ack/clients/google_sa360/__init__.py index 8d321cd2..d46139b7 100644 --- a/nck/helpers/dbm_helper.py +++ b/ack/clients/google_sa360/__init__.py @@ -15,11 +15,3 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -POSSIBLE_REQUEST_TYPES = [ - "existing_query", - "custom_query", - "existing_query_report", - "custom_query_report", - "lineitems_objects", - "list_reports", -] diff --git a/nck/clients/sa360_client.py b/ack/clients/google_sa360/client.py similarity index 88% rename from nck/clients/sa360_client.py rename to ack/clients/google_sa360/client.py index fae755ab..7dc97042 100644 --- a/nck/clients/sa360_client.py +++ b/ack/clients/google_sa360/client.py @@ -15,7 +15,8 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging + +from ack.config import logger import httplib2 import requests @@ -23,11 +24,10 @@ from oauth2client import client, GOOGLE_TOKEN_URI from googleapiclient import discovery -logger = logging.getLogger("SA360_client") DOWNLOAD_FORMAT = "CSV" -class SA360Client: +class GoogleSA360Client: API_NAME = "doubleclicksearch" API_VERSION = "v2" @@ -43,9 +43,7 @@ def __init__(self, access_token, client_id, client_secret, refresh_token): ) http = self._credentials.authorize(httplib2.Http()) self._credentials.refresh(http) - self.auth = ( - f"{self._credentials.token_response['token_type']} {self._credentials.token_response['access_token']}" - ) + self.auth = f"{self._credentials.token_response['token_type']} {self._credentials.token_response['access_token']}" self._service = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) def get_all_advertisers_of_agency(self, agency_id): @@ -61,12 +59,12 @@ def get_all_advertisers_of_agency(self, agency_id): @staticmethod def generate_report_body(agency_id, advertiser_id, report_type, columns, start_date, end_date, saved_columns): - all_columns = SA360Client.generate_columns(columns, saved_columns) + all_columns = GoogleSA360Client.generate_columns(columns, saved_columns) body = { "reportScope": {"agencyId": agency_id, "advertiserId": advertiser_id}, "reportType": report_type, "columns": all_columns, - "timeRange": SA360Client.get_date_range(start_date, end_date), + "timeRange": GoogleSA360Client.get_date_range(start_date, end_date), "downloadFormat": "csv", "maxRowsPerFile": 4000000, "statisticsCurrency": "usd", @@ -83,9 +81,9 @@ def request_report_id(self, body): def assert_report_file_ready(self, report_id): """Poll the API with the reportId until the report is ready, up to 100 times. - Args: - report_id: The ID SA360 has assigned to a report. - """ + Args: + report_id: The ID SA360 has assigned to a report. + """ request = self._service.reports().get(reportId=report_id) report_data = request.execute() if report_data["isReportReady"]: @@ -108,10 +106,10 @@ def download_report_files(self, json_data, report_id): def download_fragment(self, report_id, fragment): """Generate and convert to df a report fragment. - Args: - report_id: The ID SA360 has assigned to a report. - fragment: The 0-based index of the file fragment from the files array. - """ + Args: + report_id: The ID SA360 has assigned to a report. + fragment: The 0-based index of the file fragment from the files array. + """ request = self._service.reports().getFile(reportId=report_id, reportFragment=fragment) headers = request.headers headers.update({"Authorization": self.auth}) diff --git a/nck/utils/exceptions.py b/ack/clients/salesforce/__init__.py similarity index 79% rename from nck/utils/exceptions.py rename to ack/clients/salesforce/__init__.py index 495764d2..d46139b7 100644 --- a/nck/utils/exceptions.py +++ b/ack/clients/salesforce/__init__.py @@ -15,15 +15,3 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - -class RetryTimeoutError(Exception): - """Raised when a query exceeds it's time limit threshold.""" - - pass - - -class SdfOperationError(Exception): - """Raised when a sdf operation has failed.""" - - pass diff --git a/ack/clients/salesforce/client.py b/ack/clients/salesforce/client.py new file mode 100644 index 00000000..0e2c4a3f --- /dev/null +++ b/ack/clients/salesforce/client.py @@ -0,0 +1,116 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import urllib + +import requests +from ack.config import logger +from ack.readers.salesforce.config import ( + SALESFORCE_DESCRIBE_ENDPOINT, + SALESFORCE_LOGIN_ENDPOINT, + SALESFORCE_LOGIN_REDIRECT, + SALESFORCE_QUERY_ENDPOINT, +) + + +class SalesforceClient: + def __init__(self, user, password, consumer_key, consumer_secret): + self._user = user + self._password = password + self._consumer_key = consumer_key + self._consumer_secret = consumer_secret + + self._headers = None + self._access_token = None + self._instance_url = None + + @property + def headers(self): + return { + "Content-type": "application/json", + "Accept-Encoding": "gzip", + "Authorization": f"Bearer {self.access_token}", + } + + @property + def access_token(self): + if not self._access_token: + self._load_access_info() + + return self._access_token + + @property + def instance_url(self): + if not self._instance_url: + self._load_access_info() + + return self._instance_url + + def _load_access_info(self): + logger.info("Retrieving Salesforce access token") + + res = requests.post(SALESFORCE_LOGIN_ENDPOINT, params=self._get_login_params()) + + res.raise_for_status() + + self._access_token = res.json().get("access_token") + self._instance_url = res.json().get("instance_url") + + return self._access_token, self._instance_url + + def _get_login_params(self): + return { + "grant_type": "password", + "client_id": self._consumer_key, + "client_secret": self._consumer_secret, + "username": self._user, + "password": self._password, + "redirect_uri": SALESFORCE_LOGIN_REDIRECT, + } + + def _request_data(self, path, params=None): + + endpoint = urllib.parse.urljoin(self.instance_url, path) + response = requests.get(endpoint, headers=self.headers, params=params, timeout=30) + + response.raise_for_status() + + return response.json() + + def describe(self, object_type): + path = SALESFORCE_DESCRIBE_ENDPOINT.format(obj=object_type) + return self._request_data(path) + + def query(self, query): + + logger.info(f"Running Salesforce query: {query}") + + response = self._request_data(SALESFORCE_QUERY_ENDPOINT, {"q": query}) + + generating = True + + while generating: + + for rec in response["records"]: + yield rec + + if "nextRecordsUrl" in response: + logger.info("Fetching next page of Salesforce results") + response = self._request_data(response["nextRecordsUrl"]) + else: + generating = False diff --git a/nck/config.py b/ack/config.py similarity index 75% rename from nck/config.py rename to ack/config.py index 8b7e0741..9b9a2d4b 100644 --- a/nck/config.py +++ b/ack/config.py @@ -16,34 +16,19 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import logging -import os import sys +import os +LEVEL = os.environ.get("LOGGING_LEVEL", logging.INFO) FORMAT = "%(asctime)s - (%(name)s) - %(levelname)s - %(message)s" -logging.basicConfig(format=FORMAT) -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -handler = logging.StreamHandler(sys.stdout) - -logger.handlers = [handler] - - -def env(): - return os.environ.get("ENV", "dev") - - -def is_staging(): - return env() == "staging" - - -def is_dev(): - return env() == "dev" - - -def is_production(): - return env() == "production" +HANDLERS = [logging.StreamHandler(sys.stdout)] +logging.basicConfig(level=LEVEL, format=FORMAT, handlers=HANDLERS) +logger = logging.getLogger() +# The below snippet is used in the following modules: +# - ack/readers/objectstorage_reader.py +# - ack/writers/gcs_writer.py +# - ack/writers/bigquery_writer.py for key, var in os.environ.items(): locals()[key] = var diff --git a/ack/entrypoints/__init__.py b/ack/entrypoints/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/entrypoints/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/entrypoints/cli/__init__.py b/ack/entrypoints/cli/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/entrypoints/cli/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/entrypoints/cli/main.py b/ack/entrypoints/cli/main.py new file mode 100644 index 00000000..ff4d08c7 --- /dev/null +++ b/ack/entrypoints/cli/main.py @@ -0,0 +1,73 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import click + +from ack.writers.writer import Writer +from ack.entrypoints.cli.writers import writers +from ack.readers.reader import Reader +from ack.entrypoints.cli.readers import readers +from ack.streams.json_stream import JSONStream +from ack.streams.normalized_json_stream import NormalizedJSONStream + + +@click.group(chain=True) +@click.option( + "--normalize-keys", + default=False, + help="(Optional) If set to true, will normalize output keys, removing white spaces and special characters.", + type=bool, +) +def cli(normalize_keys): + pass + + +def build_commands(cli, available_commands): + for cmd in available_commands: + cli.add_command(cmd) + + +@cli.resultcallback() +def process_command_pipeline(provided_commands, normalize_keys): + cmd_instances = [cmd() for cmd in provided_commands] + provided_readers = list(filter(lambda o: isinstance(o, Reader), cmd_instances)) + provided_writers = list(filter(lambda o: isinstance(o, Writer), cmd_instances)) + + _validate_provided_commands(provided_readers, provided_writers) + + reader = provided_readers[0] + for stream in reader.read(): + for writer in provided_writers: + if normalize_keys and issubclass(stream.__class__, JSONStream): + writer.write(NormalizedJSONStream.create_from_stream(stream)) + else: + writer.write(stream) + + +def _validate_provided_commands(provided_readers, provided_writers): + if len(provided_readers) < 1: + raise click.BadParameter("You must specify a reader") + if len(provided_readers) > 1: + raise click.BadParameter("You cannot specify multiple readers") + if len(provided_writers) < 1: + raise click.BadParameter("You must specify at least one writer") + + +if __name__ == "__main__": + available_commands = readers + writers + build_commands(cli, available_commands) + cli() diff --git a/ack/entrypoints/cli/readers.py b/ack/entrypoints/cli/readers.py new file mode 100644 index 00000000..dc6bc84c --- /dev/null +++ b/ack/entrypoints/cli/readers.py @@ -0,0 +1,67 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from ack.readers.adobe_analytics_1_4.cli import adobe_analytics_1_4 +from ack.readers.adobe_analytics_2_0.cli import adobe_analytics_2_0 +from ack.readers.amazon_s3.cli import amazon_s3 +from ack.readers.confluence.cli import confluence +from ack.readers.facebook.cli import facebook +from ack.readers.google_ads.cli import google_ads +from ack.readers.google_analytics.cli import google_analytics +from ack.readers.google_cloud_storage.cli import google_cloud_storage +from ack.readers.google_dbm.cli import google_dbm +from ack.readers.google_dcm.cli import google_dcm +from ack.readers.google_dv360.cli import google_dv360 +from ack.readers.google_sa360.cli import google_sa360 +from ack.readers.google_search_console.cli import google_search_console +from ack.readers.google_sheets.cli import google_sheets +from ack.readers.google_sheets_old.cli import google_sheets_old +from ack.readers.mysql.cli import mysql +from ack.readers.mytarget.cli import mytarget +from ack.readers.radarly.cli import radarly +from ack.readers.salesforce.cli import salesforce +from ack.readers.the_trade_desk.cli import the_trade_desk +from ack.readers.twitter.cli import twitter +from ack.readers.yandex_campaign.cli import yandex_campaigns +from ack.readers.yandex_statistics.cli import yandex_statistics + + +readers = [ + adobe_analytics_1_4, + adobe_analytics_2_0, + amazon_s3, + confluence, + facebook, + google_ads, + google_analytics, + google_cloud_storage, + google_dbm, + google_dcm, + google_dv360, + google_sa360, + google_search_console, + google_sheets, + google_sheets_old, + mysql, + mytarget, + radarly, + salesforce, + the_trade_desk, + twitter, + yandex_campaigns, + yandex_statistics, +] diff --git a/vendor/install.sh b/ack/entrypoints/cli/writers.py similarity index 69% rename from vendor/install.sh rename to ack/entrypoints/cli/writers.py index f01d6ee0..25c66770 100644 --- a/vendor/install.sh +++ b/ack/entrypoints/cli/writers.py @@ -15,8 +15,10 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -unzip ./instantclient-basic-linux.x64-12.1.0.2.0.zip -d /opt/oracle -unzip ./instantclient-sdk-linux.x64-12.1.0.2.0.zip -d /opt/oracle -ln -sfn /opt/oracle/instantclient_12_1 /opt/oracle/instantclient -ln -s /opt/oracle/instantclient/libclntsh.so.12.1 /opt/oracle/instantclient/libclntsh.so -ln -s /opt/oracle/instantclient/libocci.so.12.1 /opt/oracle/instantclient/libocci.so +from ack.writers.amazon_s3.cli import amazon_s3 +from ack.writers.console.cli import console +from ack.writers.google_bigquery.cli import google_bigquery +from ack.writers.google_cloud_storage.cli import google_cloud_storage +from ack.writers.local.cli import local + +writers = [amazon_s3, console, google_bigquery, google_cloud_storage, local] diff --git a/ack/entrypoints/json/__init__.py b/ack/entrypoints/json/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/entrypoints/json/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/entrypoints/json/main.py b/ack/entrypoints/json/main.py new file mode 100644 index 00000000..92ff166f --- /dev/null +++ b/ack/entrypoints/json/main.py @@ -0,0 +1,47 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import click + +from ack.streams.json_stream import JSONStream +from ack.streams.normalized_json_stream import NormalizedJSONStream +from ack.utils.file_reader import read_json +from ack.utils.formatter import format_reader, format_writers + + +@click.command() +@click.option( + "--config-file", help="Path of the json file used to build the command.", required=True, type=click.Path(exists=True), +) +def read_and_write(config_file): + data = read_json(config_file) + if "normalize_keys" not in data.keys(): + data["normalize_keys"] = False + + reader = format_reader(data["reader"]) + writers = format_writers(data["writers"]) + + for stream in reader.read(): + for writer in writers: + if data["normalize_keys"] and issubclass(stream.__class__, JSONStream): + writer.write(NormalizedJSONStream.create_from_stream(stream)) + else: + writer.write(stream) + + +if __name__ == "__main__": + read_and_write() diff --git a/ack/entrypoints/json/readers.py b/ack/entrypoints/json/readers.py new file mode 100644 index 00000000..81fd8841 --- /dev/null +++ b/ack/entrypoints/json/readers.py @@ -0,0 +1,90 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from ack.readers.adobe_analytics_1_4.config import AdobeAnalytics14ReaderConfig +from ack.readers.adobe_analytics_2_0.config import AdobeAnalytics20ReaderConfig +from ack.readers.amazon_s3.config import AmazonS3ReaderConfig +from ack.readers.confluence.config import ConfluenceReaderConfig +from ack.readers.facebook.config import FacebookReaderConfig +from ack.readers.google_ads.config import GoogleAdsReaderConfig +from ack.readers.google_analytics.config import GoogleAnalyticsReaderConfig +from ack.readers.google_cloud_storage.config import GoogleCloudStorageReaderConfig +from ack.readers.google_dbm.config import GoogleDBMReaderConfig +from ack.readers.google_dcm.config import GoogleDCMReaderConfig +from ack.readers.google_dv360.config import GoogleDV360ReaderConfig +from ack.readers.google_sa360.config import GoogleSA360ReaderConfig +from ack.readers.google_search_console.config import GoogleSearchConsoleReaderConfig +from ack.readers.google_sheets.config import GoogleSheetsReaderConfig +from ack.readers.google_sheets_old.config import GoogleSheetsReaderOldConfig +from ack.readers.mysql.config import MySQLReaderConfig +from ack.readers.mytarget.config import MyTargetReaderConfig +from ack.readers.radarly.config import RadarlyReaderConfig +from ack.readers.adobe_analytics_1_4.reader import AdobeAnalytics14Reader +from ack.readers.adobe_analytics_2_0.reader import AdobeAnalytics20Reader +from ack.readers.amazon_s3.reader import AmazonS3Reader +from ack.readers.confluence.reader import ConfluenceReader +from ack.readers.facebook.reader import FacebookReader +from ack.readers.google_ads.reader import GoogleAdsReader +from ack.readers.google_analytics.reader import GoogleAnalyticsReader +from ack.readers.google_cloud_storage.reader import GoogleCloudStorageReader +from ack.readers.google_dbm.reader import GoogleDBMReader +from ack.readers.google_dcm.reader import GoogleDCMReader +from ack.readers.google_dv360.reader import GoogleDV360Reader +from ack.readers.google_sa360.reader import GoogleSA360Reader +from ack.readers.google_search_console.reader import GoogleSearchConsoleReader +from ack.readers.google_sheets.reader import GoogleSheetsReader +from ack.readers.google_sheets_old.reader import GoogleSheetsReaderOld +from ack.readers.mysql.reader import MySQLReader +from ack.readers.mytarget.reader import MyTargetReader +from ack.readers.radarly.reader import RadarlyReader +from ack.readers.salesforce.config import SalesforceReaderConfig +from ack.readers.salesforce.reader import SalesforceReader +from ack.readers.the_trade_desk.config import TheTradeDeskReaderConfig +from ack.readers.the_trade_desk.reader import TheTradeDeskReader +from ack.readers.twitter.config import TwitterReaderConfig +from ack.readers.twitter.reader import TwitterReader +from ack.readers.yandex_campaign.config import YandexCampaignReaderConfig +from ack.readers.yandex_campaign.reader import YandexCampaignReader +from ack.readers.yandex_statistics.config import YandexStatisticsReaderConfig +from ack.readers.yandex_statistics.reader import YandexStatisticsReader + + +readers_classes = { + "adobe_analytics_1_4": (AdobeAnalytics14Reader, AdobeAnalytics14ReaderConfig), + "adobe_analytics_2_0": (AdobeAnalytics20Reader, AdobeAnalytics20ReaderConfig), + "amazon_s3": (AmazonS3Reader, AmazonS3ReaderConfig), + "confluence": (ConfluenceReader, ConfluenceReaderConfig), + "facebook": (FacebookReader, FacebookReaderConfig), + "google_ads": (GoogleAdsReader, GoogleAdsReaderConfig), + "google_analytics": (GoogleAnalyticsReader, GoogleAnalyticsReaderConfig), + "google_cloud_storage": (GoogleCloudStorageReader, GoogleCloudStorageReaderConfig), + "google_dbm": (GoogleDBMReader, GoogleDBMReaderConfig), + "google_dcm": (GoogleDCMReader, GoogleDCMReaderConfig), + "google_dv360": (GoogleDV360Reader, GoogleDV360ReaderConfig), + "google_sa360": (GoogleSA360Reader, GoogleSA360ReaderConfig), + "google_search_console": (GoogleSearchConsoleReader, GoogleSearchConsoleReaderConfig), + "google_sheets": (GoogleSheetsReader, GoogleSheetsReaderConfig), + "google_sheets_old": (GoogleSheetsReaderOld, GoogleSheetsReaderOldConfig), + "mysql": (MySQLReader, MySQLReaderConfig), + "mytarget": (MyTargetReader, MyTargetReaderConfig), + "radarly": (RadarlyReader, RadarlyReaderConfig), + "salesforce": (SalesforceReader, SalesforceReaderConfig), + "the_trade_desk": (TheTradeDeskReader, TheTradeDeskReaderConfig), + "twitter": (TwitterReader, TwitterReaderConfig), + "yandex_campaign": (YandexCampaignReader, YandexCampaignReaderConfig), + "yandex_statistics": (YandexStatisticsReader, YandexStatisticsReaderConfig), +} diff --git a/ack/entrypoints/json/writers.py b/ack/entrypoints/json/writers.py new file mode 100644 index 00000000..0f6801d2 --- /dev/null +++ b/ack/entrypoints/json/writers.py @@ -0,0 +1,35 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from ack.writers.amazon_s3.config import AmazonS3WriterConfig +from ack.writers.google_bigquery.config import GoogleBigQueryWriterConfig +from ack.writers.google_cloud_storage.config import GoogleCloudStorageWriterConfig +from ack.writers.local.config import LocalWriterConfig +from ack.writers.amazon_s3.writer import AmazonS3Writer +from ack.writers.console.writer import ConsoleWriter +from ack.writers.google_bigquery.writer import GoogleBigQueryWriter +from ack.writers.google_cloud_storage.writer import GoogleCloudStorageWriter +from ack.writers.local.writer import LocalWriter + + +writers_classes = { + "amazon_s3": (AmazonS3Writer, AmazonS3WriterConfig), + "console": (ConsoleWriter,), + "google_bigquery": (GoogleBigQueryWriter, GoogleBigQueryWriterConfig), + "google_cloud_storage": (GoogleCloudStorageWriter, GoogleCloudStorageWriterConfig), + "local": (LocalWriter, LocalWriterConfig), +} diff --git a/ack/readers/__init__.py b/ack/readers/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/adobe_analytics_1_4/__init__.py b/ack/readers/adobe_analytics_1_4/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/adobe_analytics_1_4/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/adobe_analytics_1_4/cli.py b/ack/readers/adobe_analytics_1_4/cli.py new file mode 100644 index 00000000..7af1f7dd --- /dev/null +++ b/ack/readers/adobe_analytics_1_4/cli.py @@ -0,0 +1,80 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.adobe_analytics_1_4.reader import AdobeAnalytics14Reader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +def format_key_if_needed(ctx, param, value): + """ + In some cases, newlines are escaped when passed as a click.option(). + This callback corrects this unexpected behaviour. + """ + return value.replace("\\n", "\n") + + +@click.command(name="read_adobe") +@click.option( + "--adobe-client-id", + required=True, + help="Client ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-client-secret", + required=True, + help="Client Secret, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-tech-account-id", + required=True, + help="Technical Account ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-org-id", + required=True, + help="Organization ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-private-key", + required=True, + callback=format_key_if_needed, + help="Content of the private.key file, that you had to provide to create the integration. " + "Make sure to enter the parameter in quotes, include headers, and indicate newlines as '\\n'.", +) +@click.option( + "--adobe-global-company-id", + required=True, + help="Global Company ID, to be requested to Discovery API. " + "Doc: https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md)", +) +@click.option("--adobe-list-report-suite", type=click.BOOL, default=False) +@click.option("--adobe-report-suite-id") +@click.option("--adobe-report-element-id", multiple=True) +@click.option("--adobe-report-metric-id", multiple=True) +@click.option("--adobe-date-granularity", default=None) +@click.option( + "--adobe-day-range", type=click.Choice(["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_7_DAYS", "LAST_90_DAYS"]), default=None, +) +@click.option("--adobe-start-date", type=click.DateTime()) +@click.option("--adobe-end-date", default=None, type=click.DateTime()) +@processor("adobe_password", "adobe_username") +def adobe_analytics_1_4(**kwargs): + # Should handle valid combinations dimensions/metrics in the API + return AdobeAnalytics14Reader(**extract_args("adobe_", kwargs)) diff --git a/ack/readers/adobe_analytics_1_4/config.py b/ack/readers/adobe_analytics_1_4/config.py new file mode 100644 index 00000000..16cf7f1f --- /dev/null +++ b/ack/readers/adobe_analytics_1_4/config.py @@ -0,0 +1,61 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime +from typing import List, Literal + +from pydantic import BaseModel, validator + +ADOBE_API_ENDPOINT = "https://api.omniture.com/admin/1.4/rest/" +LIMIT_NVIEWS_PER_REQ = 5 +MAX_WAIT_REPORT_DELAY = 4096 +DAY_RANGE = ("PREVIOUS_DAY", "LAST_30_DAYS", "LAST_7_DAYS", "LAST_90_DAYS") + + +class AdobeAnalytics14ReaderConfig(BaseModel): + client_id: str + client_secret: str + tech_account_id: str + org_id: str + private_key: str + global_company_id: str + list_report_suite: bool = False + report_suite_id: str = None + report_element_id: List[str] = [] + report_metric_id: List[str] = [] + date_granularity: str = None + day_range: Literal[DAY_RANGE] = None + start_date: datetime = None + end_date: datetime = None + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v + + @validator("private_key") + def format_key_if_needed(cls, v): + """ + From the old Click behavior. In case if needed. + In some cases, newlines are escaped when passed as a click.option(). + This callback corrects this unexpected behaviour. + """ + return v.replace("\\n", "\n") diff --git a/nck/helpers/adobe_helper.py b/ack/readers/adobe_analytics_1_4/helper.py similarity index 82% rename from nck/helpers/adobe_helper.py rename to ack/readers/adobe_analytics_1_4/helper.py index 483b18d0..4510b935 100644 --- a/nck/helpers/adobe_helper.py +++ b/ack/readers/adobe_analytics_1_4/helper.py @@ -15,20 +15,19 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import datetime -import more_itertools -import logging -from nck.utils.text import reformat_naming_for_bq - # Credit goes to Mr Martin Winkel for the base code provided : # github : https://github.com/SaturnFromTitan/adobe_analytics +import datetime + +import more_itertools +from ack.config import logger +from ack.utils.text import reformat_naming_for_bq + def _parse_header(report): - dimensions = [ - _classification_or_name(dimension) for dimension in report["elements"] - ] + dimensions = [_classification_or_name(dimension) for dimension in report["elements"]] metrics = [metric["name"] for metric in report["metrics"]] return dimensions, metrics @@ -53,7 +52,7 @@ def _parse_data(data, metric_count): :param metric_count: int, number of metrics in report :return: list of lists """ - logging.debug("Parsing report data (recursively).") + logger.debug("Parsing report data (recursively).") if len(data) > 0 and "breakdown" in data[0]: rows = list() for chunk in data: @@ -76,7 +75,7 @@ def _parse_most_granular(data, metric_count): :param metric_count: int, number of metrics in report :return: list of lists """ - logging.debug("Parsing most granular level of data.") + logger.debug("Parsing most granular level of data.") rows = list() for chunk in data: part_rows = [(val if val != "" else None) for val in chunk["counts"]] @@ -101,20 +100,11 @@ def _dimension_value(chunk): def _dimension_value_is_nan(chunk): - return ( - ("name" not in chunk) - or (chunk["name"] == "") - or (chunk["name"] == "::unspecified::") - ) + return ("name" not in chunk) or (chunk["name"] == "") or (chunk["name"] == "::unspecified::") def _to_datetime(chunk): - time_stamp = datetime.datetime( - year=chunk["year"], - month=chunk["month"], - day=chunk["day"], - hour=chunk.get("hour", 0), - ) + time_stamp = datetime.datetime(year=chunk["year"], month=chunk["month"], day=chunk["day"], hour=chunk.get("hour", 0),) return time_stamp.strftime("%Y-%m-%d %H:00:00") @@ -131,15 +121,3 @@ def parse(raw_response): yield {headers[i]: row[i] for i in range(len(headers))} else: yield {header: None for header in headers} - - -class ReportDescriptionError(Exception): - def __init__(self, message): - super().__init__(message) - logging.error(message) - - -class ReportNotReadyError(Exception): - def __init__(self, message): - super().__init__(message) - logging.error(message) diff --git a/nck/readers/adobe_reader.py b/ack/readers/adobe_analytics_1_4/reader.py similarity index 58% rename from nck/readers/adobe_reader.py rename to ack/readers/adobe_analytics_1_4/reader.py index 8d30f2fd..2d805983 100644 --- a/nck/readers/adobe_reader.py +++ b/ack/readers/adobe_analytics_1_4/reader.py @@ -15,119 +15,48 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click -import logging -import datetime -import json -import requests -from time import sleep -from itertools import chain - -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.utils.args import extract_args -from nck.utils.retry import retry -from nck.streams.json_stream import JSONStream -from nck.clients.adobe_client import AdobeClient -from nck.helpers.adobe_helper import ReportDescriptionError, ReportNotReadyError, parse - -from click import ClickException # Credit goes to Mr Martin Winkel for the base code provided : # github : https://github.com/SaturnFromTitan/adobe_analytics -LIMIT_NVIEWS_PER_REQ = 5 - -ADOBE_API_ENDPOINT = "https://api.omniture.com/admin/1.4/rest/" - -MAX_WAIT_REPORT_DELAY = 4096 - - -def format_key_if_needed(ctx, param, value): - """ - In some cases, newlines are escaped when passed as a click.option(). - This callback corrects this unexpected behaviour. - """ - return value.replace("\\n", "\n") - - -@click.command(name="read_adobe") -@click.option( - "--adobe-client-id", - required=True, - help="Client ID, that you can find in your integration section on Adobe Developper Console.", -) -@click.option( - "--adobe-client-secret", - required=True, - help="Client Secret, that you can find in your integration section on Adobe Developper Console.", -) -@click.option( - "--adobe-tech-account-id", - required=True, - help="Technical Account ID, that you can find in your integration section on Adobe Developper Console.", -) -@click.option( - "--adobe-org-id", - required=True, - help="Organization ID, that you can find in your integration section on Adobe Developper Console.", -) -@click.option( - "--adobe-private-key", - required=True, - callback=format_key_if_needed, - help="Content of the private.key file, that you had to provide to create the integration. " - "Make sure to enter the parameter in quotes, include headers, and indicate newlines as '\\n'.", -) -@click.option( - "--adobe-global-company-id", - required=True, - help="Global Company ID, to be requested to Discovery API. " - "Doc: https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md)", -) -@click.option("--adobe-list-report-suite", type=click.BOOL, default=False) -@click.option("--adobe-report-suite-id") -@click.option("--adobe-report-element-id", multiple=True) -@click.option("--adobe-report-metric-id", multiple=True) -@click.option("--adobe-date-granularity", default=None) -@click.option( - "--adobe-day-range", - type=click.Choice(["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_7_DAYS", "LAST_90_DAYS"]), - default=None, -) -@click.option("--adobe-start-date", type=click.DateTime()) -@click.option("--adobe-end-date", default=None, type=click.DateTime()) -@processor("adobe_password", "adobe_username") -def adobe(**kwargs): - # Should handle valid combinations dimensions/metrics in the API - return AdobeReader(**extract_args("adobe_", kwargs)) - +import datetime +import json +from itertools import chain +from time import sleep -class AdobeReader(Reader): +import requests +from click import ClickException +from ack.clients.adobe_analytics.client import AdobeAnalyticsClient +from ack.config import logger +from ack.readers.adobe_analytics_1_4.config import ADOBE_API_ENDPOINT, MAX_WAIT_REPORT_DELAY +from ack.readers.adobe_analytics_1_4.helper import parse +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.date_handler import check_date_range_definition_conformity +from ack.utils.exceptions import ReportDescriptionError, ReportNotReadyError +from ack.utils.retry import retry + + +class AdobeAnalytics14Reader(Reader): def __init__( - self, - client_id, - client_secret, - tech_account_id, - org_id, - private_key, - global_company_id, - **kwargs, + self, client_id, client_secret, tech_account_id, org_id, private_key, global_company_id, **kwargs, ): - self.adobe_client = AdobeClient( - client_id, client_secret, tech_account_id, org_id, private_key - ) + self.adobe_client = AdobeAnalyticsClient(client_id, client_secret, tech_account_id, org_id, private_key) self.global_company_id = global_company_id self.kwargs = kwargs + check_date_range_definition_conformity( + self.kwargs.get("start_date"), self.kwargs.get("end_date"), self.kwargs.get("day_range") + ) + def request(self, api, method, data=None): """ Makes "raw" HTTP requests to Reporting API 1.4 (used within the query_report and get_report methods) API workflow: https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/get_started.md """ - api_method = "{0}.{1}".format(api, method) + api_method = f"{api}.{method}" data = data or dict() - logging.info("{}.{} {}".format(api, method, data)) + logger.info(f"{api}.{method} {data}") response = requests.post( ADOBE_API_ENDPOINT, params={"method": api_method}, @@ -135,7 +64,7 @@ def request(self, api, method, data=None): headers=self.adobe_client.build_request_headers(self.global_company_id), ) json_response = response.json() - logging.debug("Response: {}".format(json_response)) + logger.debug(f"Response: {json_response}") return json_response def build_report_description(self): @@ -148,17 +77,13 @@ def build_report_description(self): "reportDescription": { "source": "warehouse", "reportSuiteID": self.kwargs.get("report_suite_id"), - "elements": [ - {"id": el} for el in self.kwargs.get("report_element_id", []) - ], - "metrics": [ - {"id": mt} for mt in self.kwargs.get("report_metric_id", []) - ], + "elements": [{"id": el} for el in self.kwargs.get("report_element_id", [])], + "metrics": [{"id": mt} for mt in self.kwargs.get("report_metric_id", [])], } } self.set_date_gran_report_desc(report_description) self.set_date_range_report_desc(report_description) - logging.debug(f"report_description content {report_description}") + logger.debug(f"report_description content {report_description}") return report_description def get_days_delta(self): @@ -172,7 +97,7 @@ def get_days_delta(self): try: days_delta = delta_mapping[days_range] except KeyError: - raise ClickException("{} is not handled by the reader".format(days_range)) + raise ClickException(f"{days_range} is not handled by the reader") return days_delta def set_date_range_report_desc(self, report_description): @@ -185,21 +110,15 @@ def set_date_range_report_desc(self, report_description): else: end_date = datetime.datetime.now().date() start_date = end_date - datetime.timedelta(days=self.get_days_delta()) - report_description["reportDescription"]["dateFrom"] = start_date.strftime( - "%Y-%m-%d" - ) - report_description["reportDescription"]["dateTo"] = end_date.strftime( - "%Y-%m-%d" - ) + report_description["reportDescription"]["dateFrom"] = start_date.strftime("%Y-%m-%d") + report_description["reportDescription"]["dateTo"] = end_date.strftime("%Y-%m-%d") def set_date_gran_report_desc(self, report_description): """ Adds the dateGranularity parameter to a reportDescription. """ if self.kwargs.get("date_granularity", None) is not None: - report_description["reportDescription"][ - "dateGranularity" - ] = self.kwargs.get("date_granularity") + report_description["reportDescription"]["dateGranularity"] = self.kwargs.get("date_granularity") @retry def query_report(self): @@ -210,9 +129,7 @@ def query_report(self): - Output: reportID, to be passed to the Report.Get method - Doc: https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_Queue.md """ - query_report = self.request( - api="Report", method="Queue", data=self.build_report_description() - ) + query_report = self.request(api="Report", method="Queue", data=self.build_report_description()) return query_report @retry @@ -224,15 +141,14 @@ def get_report(self, report_id, page_number=1): - Output: reportResponse containing the requested report data - Doc: https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_Get.md """ - request_f = lambda: self.request( - api="Report", - method="Get", - data={"reportID": report_id, "page": page_number}, - ) + + def request_f(): + return self.request(api="Report", method="Get", data={"reportID": report_id, "page": page_number},) + response = request_f() idx = 1 while response.get("error") == "report_not_ready": - logging.info(f"waiting {idx} s for report to be ready") + logger.info(f"waiting {idx} s for report to be ready") sleep(idx + 1) if idx + 1 > MAX_WAIT_REPORT_DELAY: raise ReportNotReadyError("waited too long for report to be ready") @@ -249,8 +165,7 @@ def download_report(self, rep_id): all_responses = [parse(raw_response)] if "totalPages" in raw_response["report"]: all_responses = all_responses + [ - parse(self.get_report(rep_id, page_number=np)) - for np in range(2, raw_response["report"]["totalPages"] + 1) + parse(self.get_report(rep_id, page_number=np)) for np in range(2, raw_response["report"]["totalPages"] + 1) ] return chain(*all_responses) diff --git a/ack/readers/adobe_analytics_2_0/__init__.py b/ack/readers/adobe_analytics_2_0/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/adobe_analytics_2_0/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/adobe_analytics_2_0/cli.py b/ack/readers/adobe_analytics_2_0/cli.py new file mode 100644 index 00000000..7d877623 --- /dev/null +++ b/ack/readers/adobe_analytics_2_0/cli.py @@ -0,0 +1,102 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.adobe_analytics_2_0.reader import AdobeAnalytics20Reader +from ack.utils.args import extract_args +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS +from ack.utils.processor import processor + + +def format_key_if_needed(ctx, param, value): + """ + In some cases, newlines are escaped when passed as a click.option(). + This callback corrects this unexpected behaviour. + """ + return value.replace("\\n", "\n") + + +@click.command(name="read_adobe_2_0") +@click.option( + "--adobe-2-0-client-id", + required=True, + help="Client ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-2-0-client-secret", + required=True, + help="Client Secret, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-2-0-tech-account-id", + required=True, + help="Technical Account ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-2-0-org-id", + required=True, + help="Organization ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-2-0-private-key", + required=True, + callback=format_key_if_needed, + help="Content of the private.key file, that you had to provide to create the integration. " + "Make sure to enter the parameter in quotes, include headers, and indicate newlines as '\\n'.", +) +@click.option( + "--adobe-2-0-global-company-id", + required=True, + help="Global Company ID, to be requested to Discovery API. " + "Doc: https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md)", +) +@click.option( + "--adobe-2-0-report-suite-id", required=True, help="ID of the requested Adobe Report Suite", +) +@click.option( + "--adobe-2-0-dimension", + required=True, + multiple=True, + help="To get dimension names, enable the Debugger feature in Adobe Analytics Workspace: " + "it will allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. " + "Doc: https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md", +) +@click.option( + "--adobe-2-0-metric", + required=True, + multiple=True, + help="To get metric names, enable the Debugger feature in Adobe Analytics Workspace: " + "it will allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. " + "Doc: https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md", +) +@click.option( + "--adobe-2-0-start-date", type=click.DateTime(), help="Start date of the report", +) +@click.option( + "--adobe-2-0-end-date", type=click.DateTime(), help="End date of the report", +) +@click.option( + "--adobe-2-0-date-range", + type=click.Choice(DEFAULT_DATE_RANGE_FUNCTIONS.keys()), + help=f"One of the available ACK default date ranges: {DEFAULT_DATE_RANGE_FUNCTIONS.keys()}", +) +@processor( + "adobe_2_0_client_id", "adobe_2_0_client_secret", "adobe_2_0_tech_account_id", "adobe_2_0_org_id", "adobe_2_0_private_key", +) +def adobe_analytics_2_0(**kwargs): + return AdobeAnalytics20Reader(**extract_args("adobe_2_0_", kwargs)) diff --git a/ack/readers/adobe_analytics_2_0/config.py b/ack/readers/adobe_analytics_2_0/config.py new file mode 100644 index 00000000..cf29c009 --- /dev/null +++ b/ack/readers/adobe_analytics_2_0/config.py @@ -0,0 +1,60 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime +from typing import List, Literal + +from pydantic import BaseModel, validator + +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS + +DATEFORMAT = "%Y-%m-%dT%H:%M:%S" +API_WINDOW_DURATION = 6 +API_REQUESTS_OVER_WINDOW_LIMIT = 12 + + +class AdobeAnalytics20ReaderConfig(BaseModel): + client_id: str + client_secret: str + tech_account_id: str + org_id: str + private_key: str + global_company_id: str + report_suite_id: str + dimension: List[str] + metric: List[str] + start_date: datetime = None + end_date: datetime = None + date_range: Literal[tuple(DEFAULT_DATE_RANGE_FUNCTIONS.keys())] = None + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v + + @validator("private_key") + def format_key_if_needed(cls, v): + """ + From the old Click behavior. In case if needed. + In some cases, newlines are escaped when passed as a click.option(). + This callback corrects this unexpected behaviour. + """ + return v.replace("\\n", "\n") diff --git a/nck/helpers/adobe_helper_2_0.py b/ack/readers/adobe_analytics_2_0/helper.py similarity index 84% rename from nck/helpers/adobe_helper_2_0.py rename to ack/readers/adobe_analytics_2_0/helper.py index 40d10656..1f0c6a88 100644 --- a/nck/helpers/adobe_helper_2_0.py +++ b/ack/readers/adobe_analytics_2_0/helper.py @@ -16,19 +16,10 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging from datetime import datetime -class APIRateLimitError(Exception): - def __init__(self, message): - super().__init__(message) - logging.error(message) - - -def add_metric_container_to_report_description( - rep_desc, dimensions, metrics, breakdown_item_ids -): +def add_metric_container_to_report_description(rep_desc, dimensions, metrics, breakdown_item_ids): """ Filling the metricContainer section of a report description: - Creates 1 filter per dimension breakdown x metric @@ -50,10 +41,7 @@ def add_metric_container_to_report_description( ] rep_desc["metricContainer"]["metrics"] = [ - { - "id": f"metrics/{metrics[j]}", - "filters": [i + j * nb_breakdowns for i in range(nb_breakdowns)], - } + {"id": f"metrics/{metrics[j]}", "filters": [i + j * nb_breakdowns for i in range(nb_breakdowns)]} for j in range(nb_metrics) ] @@ -106,8 +94,5 @@ def parse_response(response, metrics, parent_dim_parsed): dimension: row["value"], **parsed_row_metrics, } - parsed_row = { - k: (format_date(v) if k == "daterangeday" else v) - for k, v in parsed_row.items() - } + parsed_row = {k: (format_date(v) if k == "daterangeday" else v) for k, v in parsed_row.items()} yield parsed_row diff --git a/nck/readers/adobe_reader_2_0.py b/ack/readers/adobe_analytics_2_0/reader.py similarity index 59% rename from nck/readers/adobe_reader_2_0.py rename to ack/readers/adobe_analytics_2_0/reader.py index 734f227b..f112bc39 100644 --- a/nck/readers/adobe_reader_2_0.py +++ b/ack/readers/adobe_analytics_2_0/reader.py @@ -16,120 +16,29 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging -import click import json -import requests import time -from itertools import chain from datetime import timedelta +from itertools import chain -from nck.utils.retry import retry -from nck.utils.args import extract_args -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.clients.adobe_client import AdobeClient -from nck.streams.json_stream import JSONStream -from nck.helpers.adobe_helper_2_0 import ( - APIRateLimitError, +import requests +from ack.clients.adobe_analytics.client import AdobeAnalyticsClient +from ack.config import logger +from ack.readers.adobe_analytics_2_0.config import API_REQUESTS_OVER_WINDOW_LIMIT, API_WINDOW_DURATION, DATEFORMAT +from ack.readers.adobe_analytics_2_0.helper import ( add_metric_container_to_report_description, - get_node_values_from_response, get_item_ids_from_nodes, + get_node_values_from_response, parse_response, ) - -DATEFORMAT = "%Y-%m-%dT%H:%M:%S" -API_WINDOW_DURATION = 6 -API_REQUESTS_OVER_WINDOW_LIMIT = 12 +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.date_handler import build_date_range +from ack.utils.exceptions import APIRateLimitError +from ack.utils.retry import retry -def format_key_if_needed(ctx, param, value): - """ - In some cases, newlines are escaped when passed as a click.option(). - This callback corrects this unexpected behaviour. - """ - return value.replace("\\n", "\n") - - -@click.command(name="read_adobe_2_0") -@click.option( - "--adobe-2-0-client-id", - required=True, - help="Client ID, that you can find in your integration section on Adobe Developper Console.", -) -@click.option( - "--adobe-2-0-client-secret", - required=True, - help="Client Secret, that you can find in your integration section on Adobe Developper Console.", -) -@click.option( - "--adobe-2-0-tech-account-id", - required=True, - help="Technical Account ID, that you can find in your integration section on Adobe Developper Console.", -) -@click.option( - "--adobe-2-0-org-id", - required=True, - help="Organization ID, that you can find in your integration section on Adobe Developper Console.", -) -@click.option( - "--adobe-2-0-private-key", - required=True, - callback=format_key_if_needed, - help="Content of the private.key file, that you had to provide to create the integration. " - "Make sure to enter the parameter in quotes, include headers, and indicate newlines as '\\n'.", -) -@click.option( - "--adobe-2-0-global-company-id", - required=True, - help="Global Company ID, to be requested to Discovery API. " - "Doc: https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md)", -) -@click.option( - "--adobe-2-0-report-suite-id", - required=True, - help="ID of the requested Adobe Report Suite", -) -@click.option( - "--adobe-2-0-dimension", - required=True, - multiple=True, - help="To get dimension names, enable the Debugger feature in Adobe Analytics Workspace: " - "it will allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. " - "Doc: https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md", -) -@click.option( - "--adobe-2-0-metric", - required=True, - multiple=True, - help="To get metric names, enable the Debugger feature in Adobe Analytics Workspace: " - "it will allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. " - "Doc: https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md", -) -@click.option( - "--adobe-2-0-start-date", - required=True, - type=click.DateTime(), - help="Start date of the report", -) -@click.option( - "--adobe-2-0-end-date", - required=True, - type=click.DateTime(), - help="End date of the report", -) -@processor( - "adobe_2_0_client_id", - "adobe_2_0_client_secret", - "adobe_2_0_tech_account_id", - "adobe_2_0_org_id", - "adobe_2_0_private_key", -) -def adobe_2_0(**kwargs): - return AdobeReader_2_0(**extract_args("adobe_2_0_", kwargs)) - - -class AdobeReader_2_0(Reader): +class AdobeAnalytics20Reader(Reader): def __init__( self, client_id, @@ -143,20 +52,19 @@ def __init__( metric, start_date, end_date, + date_range, ): - self.adobe_client = AdobeClient( - client_id, client_secret, tech_account_id, org_id, private_key - ) + self.adobe_client = AdobeAnalyticsClient(client_id, client_secret, tech_account_id, org_id, private_key) self.global_company_id = global_company_id self.report_suite_id = report_suite_id self.dimensions = list(dimension) self.metrics = list(metric) - self.start_date = start_date - self.end_date = end_date + timedelta(days=1) + self.start_date, self.end_date = build_date_range(start_date, end_date, date_range) + self.end_date = self.end_date + timedelta(days=1) self.ingestion_tracker = [] self.node_values = {} - def build_date_range(self): + def format_date_range(self): return f"{self.start_date.strftime(DATEFORMAT)}/{self.end_date.strftime(DATEFORMAT)}" def build_report_description(self, metrics, breakdown_item_ids=[]): @@ -169,19 +77,14 @@ def build_report_description(self, metrics, breakdown_item_ids=[]): rep_desc = { "rsid": self.report_suite_id, - "globalFilters": [ - {"type": "dateRange", "dateRange": self.build_date_range()} - ], + "globalFilters": [{"type": "dateRange", "dateRange": self.format_date_range()}], "metricContainer": {}, "dimension": f"variables/{self.dimensions[len(breakdown_item_ids)]}", "settings": {"countRepeatInstances": "true", "limit": "5000"}, } rep_desc = add_metric_container_to_report_description( - rep_desc=rep_desc, - dimensions=self.dimensions, - metrics=metrics, - breakdown_item_ids=breakdown_item_ids, + rep_desc=rep_desc, dimensions=self.dimensions, metrics=metrics, breakdown_item_ids=breakdown_item_ids, ) return rep_desc @@ -193,19 +96,11 @@ def throttle(self): current_time = time.time() self.ingestion_tracker.append(current_time) - window_ingestion_tracker = [ - t - for t in self.ingestion_tracker - if t >= (current_time - API_WINDOW_DURATION) - ] + window_ingestion_tracker = [t for t in self.ingestion_tracker if t >= (current_time - API_WINDOW_DURATION)] if len(window_ingestion_tracker) >= API_REQUESTS_OVER_WINDOW_LIMIT: - sleep_time = ( - window_ingestion_tracker[0] + API_WINDOW_DURATION - current_time - ) - logging.warning( - f"Throttling activated: sleeping for {sleep_time} seconds..." - ) + sleep_time = window_ingestion_tracker[0] + API_WINDOW_DURATION - current_time + logger.warning(f"Throttling activated: sleeping for {sleep_time} seconds...") time.sleep(sleep_time) @retry @@ -243,7 +138,7 @@ def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}): "dim": rep_desc["dimension"].split("variables/")[1], "metrics": metrics, } - logging.info(f"Getting report: {report_info}") + logger.info(f"Getting report: {report_info}") first_response = self.get_report_page(rep_desc) all_responses = [parse_response(first_response, metrics, parent_dim_parsed)] @@ -251,9 +146,7 @@ def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}): if first_response["totalPages"] > 1: for page_nb in range(1, first_response["totalPages"]): next_response = self.get_report_page(rep_desc, page_nb) - all_responses += [ - parse_response(next_response, metrics, parent_dim_parsed) - ] + all_responses += [parse_response(next_response, metrics, parent_dim_parsed)] return chain(*all_responses) @@ -264,17 +157,13 @@ def get_node_values(self, breakdown_item_ids): For instance: {'daterangeday_1200001': 'Jan 1, 2020'} """ - rep_desc = self.build_report_description( - metrics=["visits"], breakdown_item_ids=breakdown_item_ids - ) + rep_desc = self.build_report_description(metrics=["visits"], breakdown_item_ids=breakdown_item_ids) first_response = self.get_report_page(rep_desc) node_values = get_node_values_from_response(first_response) if first_response["totalPages"] > 1: for page_nb in range(1, first_response["totalPages"]): - next_node_values = get_node_values_from_response( - self.get_report_page(rep_desc, page_nb) - ) + next_node_values = get_node_values_from_response(self.get_report_page(rep_desc, page_nb)) node_values.update(next_node_values) return node_values @@ -288,7 +177,7 @@ def add_child_nodes_to_graph(self, graph, node, path_to_node): child_node_2: [] """ - logging.info(f"Adding child nodes of '{node}' to graph.") + logger.info(f"Adding child nodes of '{node}' to graph.") breakdown_item_ids = get_item_ids_from_nodes(path_to_node) child_node_values = self.get_node_values(breakdown_item_ids) @@ -333,13 +222,9 @@ def read_through_graph(self, graph=None, node=None): # If no remaining node children to explore: get report if len(path_to_node) == len(self.dimensions) - 1: - parent_dim_parsed = { - node.split("_")[0]: self.node_values[node] for node in path_to_node - } + parent_dim_parsed = {node.split("_")[0]: self.node_values[node] for node in path_to_node} breakdown_item_ids = get_item_ids_from_nodes(path_to_node) - rep_desc = self.build_report_description( - self.metrics, breakdown_item_ids - ) + rep_desc = self.build_report_description(self.metrics, breakdown_item_ids) data = self.get_parsed_report(rep_desc, self.metrics, parent_dim_parsed) yield from self.result_generator(data) @@ -348,9 +233,7 @@ def read_through_graph(self, graph=None, node=None): visited.append(node) # Update unvisited_childs - unvisited_childs = [ - child_node for child_node in graph[node] if child_node not in visited - ] + unvisited_childs = [child_node for child_node in graph[node] if child_node not in visited] # Read through child node children for child_node in unvisited_childs: @@ -366,10 +249,6 @@ def read_through_graph(self, graph=None, node=None): def read(self): if len(self.dimensions) == 1: - yield JSONStream( - "results_" + self.report_suite_id, self.read_one_dimension() - ) + yield JSONStream("results_" + self.report_suite_id, self.read_one_dimension()) elif len(self.dimensions) > 1: - yield JSONStream( - "results_" + self.report_suite_id, self.read_through_graph() - ) + yield JSONStream("results_" + self.report_suite_id, self.read_through_graph()) diff --git a/ack/readers/amazon_s3/__init__.py b/ack/readers/amazon_s3/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/amazon_s3/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/amazon_s3/cli.py b/ack/readers/amazon_s3/cli.py new file mode 100644 index 00000000..442b5061 --- /dev/null +++ b/ack/readers/amazon_s3/cli.py @@ -0,0 +1,34 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.amazon_s3.reader import AmazonS3Reader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +@click.command(name="read_s3") +@click.option("--s3-bucket", required=True) +@click.option("--s3-prefix", required=True, multiple=True) +@click.option("--s3-format", required=True, type=click.Choice(["csv", "gz", "njson"])) +@click.option("--s3-dest-key-split", default=-1, type=int) +@click.option("--s3-csv-delimiter", default=",") +@click.option("--s3-csv-fieldnames", default=None) +@processor() +def amazon_s3(**kwargs): + return AmazonS3Reader(**extract_args("s3_", kwargs)) diff --git a/ack/readers/amazon_s3/config.py b/ack/readers/amazon_s3/config.py new file mode 100644 index 00000000..ab698ea5 --- /dev/null +++ b/ack/readers/amazon_s3/config.py @@ -0,0 +1,15 @@ +from typing import List, Literal + +from pydantic import BaseModel + + +FORMATS = ("csv", "gz", "njson") + + +class AmazonS3ReaderConfig(BaseModel): + bucket: str + prefix: List[str] + format: Literal[FORMATS] + dest_key_split: int = 1 + csv_delimiter: str = "," + csv_fieldnames: str = None diff --git a/nck/readers/s3_reader.py b/ack/readers/amazon_s3/reader.py similarity index 69% rename from nck/readers/s3_reader.py rename to ack/readers/amazon_s3/reader.py index 8a0c085d..c89ac8a8 100644 --- a/nck/readers/s3_reader.py +++ b/ack/readers/amazon_s3/reader.py @@ -15,31 +15,14 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click import boto3 -from nck.commands.command import processor -from nck.readers.objectstorage_reader import ObjectStorageReader -from nck.utils.args import extract_args +from ack.readers.object_storage.reader import ObjectStorageReader -@click.command(name="read_s3") -@click.option("--s3-bucket", required=True) -@click.option("--s3-prefix", required=True, multiple=True) -@click.option("--s3-format", required=True, type=click.Choice(["csv", "gz"])) -@click.option("--s3-dest-key-split", default=-1, type=int) -@click.option("--s3-csv-delimiter", default=",") -@click.option("--s3-csv-fieldnames", default=None) -@processor() -def s3(**kwargs): - return S3Reader(**extract_args("s3_", kwargs)) - - -class S3Reader(ObjectStorageReader): +class AmazonS3Reader(ObjectStorageReader): def __init__(self, bucket, prefix, format, dest_key_split=-1, **kwargs): - super().__init__( - bucket, prefix, format, dest_key_split, platform="S3", **kwargs - ) + super().__init__(bucket, prefix, format, dest_key_split, platform="S3", **kwargs) def create_client(self, config): boto_config = { diff --git a/ack/readers/confluence/__init__.py b/ack/readers/confluence/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/confluence/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/confluence/cli.py b/ack/readers/confluence/cli.py new file mode 100644 index 00000000..643ee1c6 --- /dev/null +++ b/ack/readers/confluence/cli.py @@ -0,0 +1,46 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.confluence.reader import ConfluenceReader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +@click.command(name="read_confluence") +@click.option("--confluence-user-login", required=True, help="User login associated with your Atlassian account") +@click.option("--confluence-api-token", required=True, help="API token associated with your Atlassian account") +@click.option( + "--confluence-atlassian-domain", required=True, help="Atlassian domain under which the content to request is located", +) +@click.option( + "--confluence-content-type", + type=click.Choice(["page", "blogpost"]), + default="page", + help="Type of content on which the report should be filtered", +) +@click.option("--confluence-spacekey", multiple=True, help="Space keys on which the report should be filtered") +@click.option( + "--confluence-field", + required=True, + multiple=True, + help="Fields that should be included in the report (path.to.field.value or custom_field)", +) +@processor("confluence_user_login", "confluence_api_token") +def confluence(**kwargs): + return ConfluenceReader(**extract_args("confluence_", kwargs)) diff --git a/ack/readers/confluence/config.py b/ack/readers/confluence/config.py new file mode 100644 index 00000000..47934e5b --- /dev/null +++ b/ack/readers/confluence/config.py @@ -0,0 +1,33 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from typing import Literal, List + +from pydantic import BaseModel + +RECORDS_PER_PAGE = 100 +CONTENT_ENDPOINT = "wiki/rest/api/content" +CONTENT_TYPES = ("page", "blogpost") + + +class ConfluenceReaderConfig(BaseModel): + user_login: str + api_token: str + atlassian_domain: str + content_type: Literal[CONTENT_TYPES] = "page" + spacekey: List[str] = [] + field: List[str] diff --git a/ack/readers/confluence/helper.py b/ack/readers/confluence/helper.py new file mode 100644 index 00000000..310d2384 --- /dev/null +++ b/ack/readers/confluence/helper.py @@ -0,0 +1,228 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import re +from typing import Dict, List, Optional + +from bs4 import BeautifulSoup +from bs4.element import Tag +from unidecode import unidecode + + +def parse_response(raw_response, fields): + for content_dct in raw_response["results"]: + content_record = {} + for field in fields: + field_path = _get_field_path(field) + field_value = _get_field_value(content_dct, field_path) + field_as_dct = _format_field_as_dct(field, field_value) + content_record.update(field_as_dct) + yield content_record + + +# PARSE RESPONSE: Helpers + + +def _get_field_path(field): + if field in CUSTOM_FIELDS: + return CUSTOM_FIELDS[field]["source_field"].split(".") + else: + return field.split(".") + + +def _get_field_value(content_dct, field_path, visited=[]): + path_item = field_path[0] + remaining_path_items = len(field_path) - 1 + visited.append(path_item) + if path_item in content_dct: + if remaining_path_items == 0: + return content_dct[path_item] + else: + return _get_field_value(content_dct[path_item], field_path[1:], visited) + + +def _format_field_as_dct(field, field_value): + if field not in CUSTOM_FIELDS: + field_as_dct = {field: field_value} + else: + format_function = CUSTOM_FIELDS[field]["format_function"] + kwargs = CUSTOM_FIELDS[field]["format_function_kwargs"] + formatted_object = format_function(field_value, **kwargs) + if CUSTOM_FIELDS[field]["formatted_object_type"] == dict: + field_as_dct = formatted_object + else: + field_as_dct = {field: formatted_object} + return {_decode(key): _decode(value) for key, value in field_as_dct.items()} + + +def _decode(raw_value): + if isinstance(raw_value, str): + decoded_emoji = raw_value.encode("utf-16", "surrogatepass").decode("utf-16") + return unidecode(decoded_emoji).replace(" ", " ").strip() + else: + return raw_value + + +# CUSTOM FIELDS: format functions + + +def _get_tiny_link(field_value: str) -> str: + atlassian_domain = field_value["self"].split("/wiki")[0] + shortened_path = field_value["tinyui"] + return f"{atlassian_domain}/wiki{shortened_path}" + + +def _get_key_values_from_list_of_dct(field_value: List[dict], key: str) -> str: + key_values = [dct.get(key, "") for dct in field_value] + return "|".join(key_values) + + +def _get_client_properties(field_value: str) -> Optional[Dict[str, str]]: + client_properties_dct = {} + html_soup = BeautifulSoup(field_value, "lxml") + DEFAULT_PROPERTIES = [ + "SALESFORCE ID", + "CONFIDENTIALITY", + "ARTICLE STATUS", + "INDUSTRY", + "CLIENT COMPANY", + "SCOPE", + "MISSION START DATE", + "MISSION END DATE", + "AMOUNT SOLD", + "MISSION TOPIC", + "COMMERCIAL PROPOSAL", + "ONE PAGER", + "ARCHITECTURE", + ] + + properties_section = _get_section_by_title(html_soup, "CASE ID CARD") + if properties_section is not None: + + table = properties_section.table + rows = table.find_all("tr") + + first_row_headers = rows[0].find_all("th") + first_row_datas = rows[0].find_all("td") + if len(first_row_headers) == 1 and len(first_row_datas) == 1: + + for row in rows: + + key = _decode(row.th.text).upper() + text = _decode(row.td.text) + links = [elt["href"] for elt in row.find_all("a")] + + if key in ["COMMERCIAL PROPOSAL", "ONE PAGER", "ARCHITECTURE"]: + client_properties_dct[key] = "|".join(links) + elif key in ["CONFIDENTIALITY", "ARTICLE STATUS"]: + client_properties_dct[key] = re.sub(r"Green|Yellow|Red", "", text).upper() + else: + client_properties_dct[key] = text + + return DictToClean(client_properties_dct, DEFAULT_PROPERTIES, "", "client_property_").clean() + + +def _get_client_completion(field_value: str) -> Optional[Dict[str, int]]: + client_completion_dct = {} + html_soup = BeautifulSoup(field_value, "lxml") + DEFAULT_SECTIONS_LENGTH = {"KEY LEARNINGS": 195, "CONTEXT": 117, "APPROACH": 232, "CONCLUSION": 83} + + for required_title in DEFAULT_SECTIONS_LENGTH: + section = _get_section_by_title(html_soup, required_title) + if section is not None: + text = _decode(section.text) + section_is_completed = len(text) > DEFAULT_SECTIONS_LENGTH[required_title] + client_completion_dct[required_title] = int(section_is_completed) + + return DictToClean(client_completion_dct, DEFAULT_SECTIONS_LENGTH.keys(), 0, "client_completion_").clean() + + +CUSTOM_FIELDS = { + "tiny_link": { + "source_field": "_links", + "format_function": _get_tiny_link, + "format_function_kwargs": {}, + "formatted_object_type": str, + }, + "label_names": { + "source_field": "metadata.labels.results", + "format_function": _get_key_values_from_list_of_dct, + "format_function_kwargs": {"key": "name"}, + "formatted_object_type": str, + }, + "children_page_id": { + "source_field": "children.page.results", + "format_function": _get_key_values_from_list_of_dct, + "format_function_kwargs": {"key": "id"}, + "formatted_object_type": str, + }, + "children_page_title": { + "source_field": "children.page.results", + "format_function": _get_key_values_from_list_of_dct, + "format_function_kwargs": {"key": "title"}, + "formatted_object_type": str, + }, + "client_properties": { + "source_field": "body.storage.value", + "format_function": _get_client_properties, + "format_function_kwargs": {}, + "formatted_object_type": dict, + "specific_to_spacekeys": ["KA"], + }, + "client_completion": { + "source_field": "body.storage.value", + "format_function": _get_client_completion, + "format_function_kwargs": {}, + "formatted_object_type": dict, + "specific_to_spacekeys": ["KA"], + }, +} + + +# CUSTOM FIELDS: helpers + + +def _get_section_by_title(html_soup: BeautifulSoup, searched_title: str) -> Tag: + for section in html_soup.find_all("ac:layout-section"): + + h1_elements = [_decode(h1.text).upper() for h1 in section.find_all("h1")] + strong_elements = [_decode(strong.text).upper() for strong in section.find_all("strong")] + section_titles = list(set(h1_elements + strong_elements)) + + for title in section_titles: + if searched_title in title: + return section + + +class DictToClean: + def __init__(self, dct, expected_keys, default_value, prefix): + self.dct = dct + self.expected_keys = expected_keys + self.default_value = default_value + self.prefix = prefix + + def clean(self): + self._keep_expected_keys_only() + self._add_prefix() + return self.dct + + def _keep_expected_keys_only(self): + self.dct = {key: self.dct[key] if key in self.dct else self.default_value for key in self.expected_keys} + + def _add_prefix(self): + self.dct = {f"{self.prefix}{key}": value for key, value in self.dct.items()} diff --git a/ack/readers/confluence/reader.py b/ack/readers/confluence/reader.py new file mode 100644 index 00000000..a282fc1c --- /dev/null +++ b/ack/readers/confluence/reader.py @@ -0,0 +1,101 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import base64 +from itertools import chain + +import requests +from click import ClickException +from ack.readers.confluence.config import CONTENT_ENDPOINT, RECORDS_PER_PAGE +from ack.readers.confluence.helper import CUSTOM_FIELDS, parse_response +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream + + +class ConfluenceReader(Reader): + def __init__(self, user_login, api_token, atlassian_domain, content_type, spacekey, field): + self.user_login = user_login + self.api_token = api_token + self._build_headers() + self.atlassian_domain = atlassian_domain + self.content_type = content_type + self.spacekeys = list(spacekey) + self.fields = list(field) + + self._validate_spacekeys() + + def _validate_spacekeys(self): + requirements = [ + CUSTOM_FIELDS[field]["specific_to_spacekeys"] + for field in self.fields + if field in CUSTOM_FIELDS and "specific_to_spacekeys" in CUSTOM_FIELDS[field] + ] + if len(requirements) > 0: + inter_requirements = ( + requirements[0] if len(requirements) == 1 else list(set(requirements[0]).intersection(*requirements[1:])) + ) + if len(inter_requirements) == 0: + raise ClickException("Invalid request. No intersection found between spacekey requirements.") + elif self.spacekeys != inter_requirements: + raise ClickException(f"Invalid request. Spacekeys should be set to '{inter_requirements}'.") + + def _build_headers(self): + api_login = f"{self.user_login}:{self.api_token}" + encoded_bytes = base64.b64encode(api_login.encode("utf-8")) + encoded_string = str(encoded_bytes, "utf-8") + self.headers = {"Authorization": f"Basic {encoded_string}", "Content-Type": "application/json"} + + def _build_params(self): + api_fields = [CUSTOM_FIELDS[field]["source_field"] if field in CUSTOM_FIELDS else field for field in self.fields] + return {"type": self.content_type, "expand": ",".join(api_fields)} + + def _get_raw_response(self, page_nb, spacekey=None): + params = self._build_params() + params["start"] = page_nb * RECORDS_PER_PAGE + params["limit"] = RECORDS_PER_PAGE + if spacekey is not None: + params["spaceKey"] = spacekey + + url = f"{self.atlassian_domain}/{CONTENT_ENDPOINT}" + response = requests.get(url, headers=self.headers, params=params) + if response.ok: + return response.json() + else: + response.raise_for_status() + + def _get_report_generator(self, spacekey=None): + page_nb = 0 + raw_response = self._get_raw_response(page_nb, spacekey) + all_responses = [parse_response(raw_response, self.fields)] + + while raw_response["_links"].get("next"): + page_nb += 1 + raw_response = self._get_raw_response(page_nb, spacekey) + all_responses.append(parse_response(raw_response, self.fields)) + + return chain(*all_responses) + + def _get_aggregated_report_generator(self): + if self.spacekeys: + for spacekey in self.spacekeys: + yield from self._get_report_generator(spacekey) + else: + yield from self._get_report_generator() + + def read(self): + yield JSONStream("results_", self._get_aggregated_report_generator()) diff --git a/ack/readers/facebook/__init__.py b/ack/readers/facebook/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/facebook/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/facebook/cli.py b/ack/readers/facebook/cli.py new file mode 100644 index 00000000..5019ae3f --- /dev/null +++ b/ack/readers/facebook/cli.py @@ -0,0 +1,73 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from click import ClickException +from ack.readers.facebook.config import ACTION_BREAKDOWNS, BREAKDOWNS, DATE_PRESETS, FACEBOOK_OBJECTS +from ack.readers.facebook.reader import FacebookReader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +def check_object_id(ctx, param, values): + try: + [int(value) for value in values] + return values + except ValueError: + raise ClickException("Wrong format. Ad object IDs should only contains digits.") + + +@click.command(name="read_facebook") +@click.option("--facebook-app-id", default="", help="Not mandatory for AdsInsights reporting if access-token provided") +@click.option("--facebook-app-secret", default="", help="Not mandatory for AdsInsights reporting if access-token provided") +@click.option("--facebook-access-token", required=True) +@click.option("--facebook-object-id", required=True, multiple=True, callback=check_object_id) +@click.option("--facebook-object-type", type=click.Choice(FACEBOOK_OBJECTS), default="account") +@click.option("--facebook-level", type=click.Choice(FACEBOOK_OBJECTS), default="ad", help="Granularity of result") +@click.option( + "--facebook-ad-insights", + type=click.BOOL, + default=True, + help="https://developers.facebook.com/docs/marketing-api/insights", +) +@click.option( + "--facebook-breakdown", + multiple=True, + type=click.Choice(BREAKDOWNS), + help="https://developers.facebook.com/docs/marketing-api/insights/breakdowns/", +) +@click.option( + "--facebook-action-breakdown", + multiple=True, + type=click.Choice(ACTION_BREAKDOWNS), + help="https://developers.facebook.com/docs/marketing-api/insights/breakdowns#actionsbreakdown", +) +@click.option("--facebook-field", multiple=True, help="API fields, following Artefact format") +@click.option("--facebook-time-increment") +@click.option("--facebook-start-date", type=click.DateTime()) +@click.option("--facebook-end-date", type=click.DateTime()) +@click.option("--facebook-date-preset", type=click.Choice(DATE_PRESETS)) +@click.option( + "--facebook-add-date-to-report", + type=click.BOOL, + default=False, + help="If set to true, the date of the request will appear in the report", +) +@processor("facebook_app_secret", "facebook_access_token") +def facebook(**kwargs): + return FacebookReader(**extract_args("facebook_", kwargs)) diff --git a/ack/readers/facebook/config.py b/ack/readers/facebook/config.py new file mode 100644 index 00000000..55f88b2b --- /dev/null +++ b/ack/readers/facebook/config.py @@ -0,0 +1,87 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime +from typing import List, Literal + +from facebook_business.adobjects.ad import Ad +from facebook_business.adobjects.adaccount import AdAccount +from facebook_business.adobjects.adcreative import AdCreative +from facebook_business.adobjects.adset import AdSet +from facebook_business.adobjects.adsinsights import AdsInsights +from facebook_business.adobjects.adspixel import AdsPixel +from facebook_business.adobjects.campaign import Campaign +from pydantic import BaseModel, validator + +DATEFORMAT = "%Y-%m-%d" +BATCH_SIZE_LIMIT = 50 + +FACEBOOK_OBJECTS = ["pixel", "creative", "ad", "adset", "campaign", "account"] +DATE_PRESETS = [v for k, v in AdsInsights.DatePreset.__dict__.items() if not k.startswith("__")] +BREAKDOWNS = [v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__")] +ACTION_BREAKDOWNS = [v for k, v in AdsInsights.ActionBreakdowns.__dict__.items() if not k.startswith("__")] + +OBJECT_CREATION_MAPPING = { + "account": AdAccount, + "campaign": Campaign, + "adset": AdSet, + "ad": Ad, + "creative": AdCreative, + "pixel": AdsPixel, +} + +EDGE_MAPPING = { + "account": ["campaign", "adset", "ad", "creative", "pixel"], + "campaign": ["adset", "ad"], + "adset": ["ad", "creative"], + "ad": ["creative"], +} + +EDGE_QUERY_MAPPING = { + "campaign": lambda obj: obj.get_campaigns(), + "adset": lambda obj: obj.get_ad_sets(), + "ad": lambda obj: obj.get_ads(), + "creative": lambda obj: obj.get_ad_creatives(), + "pixel": lambda obj: obj.get_ads_pixels(), +} + + +class FacebookReaderConfig(BaseModel): + app_id: str = "" + app_secret: str = "" + access_token: str + object_id: List[str] + object_type: Literal[tuple(FACEBOOK_OBJECTS)] = "account" + level: Literal[tuple(FACEBOOK_OBJECTS)] = "ad" + ad_insights: bool = True + breakdown: List[Literal[tuple(BREAKDOWNS)]] = [] + action_breakdown: List[Literal[tuple(ACTION_BREAKDOWNS)]] = [] + field: List[str] = [] + time_increment: str = None + start_date: datetime = None + end_date: datetime = None + date_preset: Literal[tuple(DATE_PRESETS)] = None + add_date_to_report: bool = False + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v diff --git a/nck/helpers/facebook_helper.py b/ack/readers/facebook/helper.py similarity index 70% rename from nck/helpers/facebook_helper.py rename to ack/readers/facebook/helper.py index b070bcaf..da6e5c15 100644 --- a/nck/helpers/facebook_helper.py +++ b/ack/readers/facebook/helper.py @@ -16,27 +16,10 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging import json from time import sleep -from facebook_business.adobjects.adsinsights import AdsInsights - -FACEBOOK_OBJECTS = ["creative", "ad", "adset", "campaign", "account"] - -DATE_PRESETS = [ - v for k, v in AdsInsights.DatePreset.__dict__.items() if not k.startswith("__") -] - -BREAKDOWNS = [ - v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__") -] - -ACTION_BREAKDOWNS = [ - v - for k, v in AdsInsights.ActionBreakdowns.__dict__.items() - if not k.startswith("__") -] +from ack.config import logger def get_action_breakdown_filters(field_path): @@ -73,7 +56,26 @@ def format_field_path(field_path): return "".join([field_path[0]] + [f"[{element}]" for element in field_path[1:]]) -def check_if_obj_meets_action_breakdown_filters(obj, filters): +def obj_follows_action_breakdown_pattern(obj): + """ + Checks wether obj is a list of dictionnaries, in which + each dictionnary has at least one key starting with 'action_' + """ + return ( + isinstance(obj, list) + and all(isinstance(elt, dict) for elt in obj) + and all(any(key.startswith("action_") for key in elt.keys()) for elt in obj) + ) + + +def obj_is_list_of_single_values(obj): + """ + Checks wether obj is a list of strings, integers or floats. + """ + return isinstance(obj, list) and all(isinstance(elt, (str, int, float)) for elt in obj) + + +def obj_meets_action_breakdown_filters(obj, filters): """ Checks if a nested action breakdown object meets the conditions defined by action breakdown filters. @@ -105,9 +107,7 @@ def get_action_breakdown_value(obj, visited, action_breakdowns): {'actions[action_type:video_view][action_device:iphone]': '12'} """ obj_action_breakdown = [ - f"{action_breakdown}:{obj[action_breakdown]}" - for action_breakdown in action_breakdowns - if action_breakdown in obj + f"{action_breakdown}:{obj[action_breakdown]}" for action_breakdown in action_breakdowns if action_breakdown in obj ] return {format_field_path(visited + obj_action_breakdown): obj["value"]} @@ -120,17 +120,21 @@ def get_all_action_breakdown_values(resp_obj, visited, action_breakdowns, filter action_breakdown_values = {} for obj in resp_obj: if filters != {}: - if check_if_obj_meets_action_breakdown_filters(obj, filters): - action_breakdown_values.update( - get_action_breakdown_value(obj, visited, action_breakdowns) - ) + if obj_meets_action_breakdown_filters(obj, filters): + action_breakdown_values.update(get_action_breakdown_value(obj, visited, action_breakdowns)) else: - action_breakdown_values.update( - get_action_breakdown_value(obj, visited, action_breakdowns) - ) + action_breakdown_values.update(get_action_breakdown_value(obj, visited, action_breakdowns)) return action_breakdown_values +def get_obj_data(obj): + """ + If obj is a Facebook Object: returns associated data + If obj is a standard Python object: returns obj itself + """ + return obj._data if hasattr(obj, "_data") else obj + + def get_field_values(resp_obj, field_path, action_breakdowns, visited=[]): """ Recursive function extracting (and formating) the values @@ -142,23 +146,20 @@ def get_field_values(resp_obj, field_path, action_breakdowns, visited=[]): visited.append(path_item) if path_item in resp_obj: + current_obj = get_obj_data(resp_obj[path_item]) if remaining_path_items == 0: - if isinstance(resp_obj[path_item], str): - return {format_field_path(visited): resp_obj[path_item]} - if isinstance(resp_obj[path_item], list): - return get_all_action_breakdown_values( - resp_obj[path_item], visited, action_breakdowns - ) + if obj_follows_action_breakdown_pattern(current_obj): + return get_all_action_breakdown_values(current_obj, visited, action_breakdowns) + elif obj_is_list_of_single_values(current_obj): + return {format_field_path(visited): ", ".join(map(str, current_obj))} + else: + return {format_field_path(visited): str(current_obj)} else: - return get_field_values( - resp_obj[path_item], field_path[1:], action_breakdowns, visited - ) + return get_field_values(current_obj, field_path[1:], action_breakdowns, visited) else: if all(":" in f for f in field_path): filters = get_action_breakdown_filters(field_path) - return get_all_action_breakdown_values( - resp_obj, visited[:-1], action_breakdowns, filters - ) + return get_all_action_breakdown_values(resp_obj, visited[:-1], action_breakdowns, filters) def generate_batches(iterable, batch_size): @@ -190,12 +191,8 @@ def monitor_usage(response): if header["name"] == "X-Business-Use-Case-Usage": usage_header = json.loads(header["value"]) usage_header_values = list(usage_header.values())[0][0] - usage_rates = [ - v - for k, v in usage_header_values.items() - if k in ["call_count", "total_cputime", "total_time"] - ] + usage_rates = [v for k, v in usage_header_values.items() if k in ["call_count", "total_cputime", "total_time"]] if max(usage_rates) > 75: - logging.info("75% rate limit reached. Sleeping for 5 minutes...") + logger.info("75% rate limit reached. Sleeping for 5 minutes...") sleep(300) diff --git a/nck/readers/facebook_reader.py b/ack/readers/facebook/reader.py similarity index 59% rename from nck/readers/facebook_reader.py rename to ack/readers/facebook/reader.py index dcf934c9..a81a746e 100644 --- a/nck/readers/facebook_reader.py +++ b/ack/readers/facebook/reader.py @@ -17,127 +17,28 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import logging -import click import re -from math import ceil -from click import ClickException from datetime import datetime +from math import ceil -from nck.readers.reader import Reader -from nck.utils.args import extract_args -from nck.commands.command import processor -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.helpers.facebook_helper import ( - FACEBOOK_OBJECTS, - DATE_PRESETS, - BREAKDOWNS, - ACTION_BREAKDOWNS, - get_action_breakdown_filters, - get_field_values, - generate_batches, - monitor_usage, -) - +from click import ClickException +from facebook_business.adobjects.adreportrun import AdReportRun from facebook_business.api import FacebookAdsApi -from facebook_business.adobjects.adaccount import AdAccount -from facebook_business.adobjects.campaign import Campaign -from facebook_business.adobjects.adset import AdSet -from facebook_business.adobjects.ad import Ad -from facebook_business.adobjects.adcreative import AdCreative - -DATEFORMAT = "%Y-%m-%d" - -OBJECT_CREATION_MAPPING = { - "account": AdAccount, - "campaign": Campaign, - "adset": AdSet, - "ad": Ad, - "creative": AdCreative, -} - -EDGE_MAPPING = { - "account": ["campaign", "adset", "ad", "creative"], - "campaign": ["adset", "ad"], - "adset": ["ad", "creative"], - "ad": ["creative"], -} - -EDGE_QUERY_MAPPING = { - "campaign": lambda obj: obj.get_campaigns(), - "adset": lambda obj: obj.get_ad_sets(), - "ad": lambda obj: obj.get_ads(), - "creative": lambda obj: obj.get_ad_creatives(), -} - -BATCH_SIZE_LIMIT = 50 - - -def check_object_id(ctx, param, values): - try: - [int(value) for value in values] - return values - except ValueError: - raise ClickException("Wrong format. Ad object IDs should only contains digits.") - - -@click.command(name="read_facebook") -@click.option( - "--facebook-app-id", - default="", - help="Not mandatory for AdsInsights reporting if access-token provided", -) -@click.option( - "--facebook-app-secret", - default="", - help="Not mandatory for AdsInsights reporting if access-token provided", -) -@click.option("--facebook-access-token", required=True) -@click.option( - "--facebook-object-id", required=True, multiple=True, callback=check_object_id -) -@click.option( - "--facebook-object-type", type=click.Choice(FACEBOOK_OBJECTS), default="account" -) -@click.option( - "--facebook-level", - type=click.Choice(FACEBOOK_OBJECTS), - default="ad", - help="Granularity of result", -) -@click.option( - "--facebook-ad-insights", - type=click.BOOL, - default=True, - help="https://developers.facebook.com/docs/marketing-api/insights", -) -@click.option( - "--facebook-breakdown", - multiple=True, - type=click.Choice(BREAKDOWNS), - help="https://developers.facebook.com/docs/marketing-api/insights/breakdowns/", -) -@click.option( - "--facebook-action-breakdown", - multiple=True, - type=click.Choice(ACTION_BREAKDOWNS), - help="https://developers.facebook.com/docs/marketing-api/insights/breakdowns#actionsbreakdown", -) -@click.option( - "--facebook-field", multiple=True, help="API fields, following Artefact format" -) -@click.option("--facebook-time-increment") -@click.option("--facebook-start-date", type=click.DateTime()) -@click.option("--facebook-end-date", type=click.DateTime()) -@click.option("--facebook-date-preset", type=click.Choice(DATE_PRESETS)) -@click.option( - "--facebook-add-date-to-report", - type=click.BOOL, - default=False, - help="If set to true, the date of the request will appear in the report", +from ack.config import logger +from ack.readers.facebook.config import ( + BATCH_SIZE_LIMIT, + BREAKDOWNS, + DATEFORMAT, + EDGE_MAPPING, + EDGE_QUERY_MAPPING, + FACEBOOK_OBJECTS, + OBJECT_CREATION_MAPPING, ) -@processor("facebook_app_secret", "facebook_access_token") -def facebook(**kwargs): - return FacebookReader(**extract_args("facebook_", kwargs)) +from ack.readers.facebook.helper import generate_batches, get_action_breakdown_filters, get_field_values, monitor_usage +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.date_handler import check_date_range_definition_conformity +from tenacity import retry, stop_after_attempt, stop_after_delay, wait_exponential, wait_none class FacebookReader(Reader): @@ -176,9 +77,7 @@ def __init__( self.action_breakdowns = list(action_breakdown) self.fields = list(field) self._field_paths = [re.split(r"[\]\[]+", f.strip("]")) for f in self.fields] - self._api_fields = list( - {f[0] for f in self._field_paths if f[0] not in self.breakdowns} - ) + self._api_fields = list({f[0] for f in self._field_paths if f[0] not in self.breakdowns}) # Date inputs self.time_increment = time_increment or False @@ -189,6 +88,7 @@ def __init__( # Validate inputs self.validate_inputs() + check_date_range_definition_conformity(self.start_date, self.end_date, self.date_preset) def validate_inputs(self): """ @@ -202,9 +102,7 @@ def validate_inputs(self): def validate_object_type_and_level_combination(self): - if (self.level != self.object_type) and ( - self.level not in EDGE_MAPPING[self.object_type] - ): + if (self.level != self.object_type) and (self.level not in EDGE_MAPPING[self.object_type]): raise ClickException( f"Wrong query. Asked level ({self.level}) is not compatible with object type ({self.object_type}).\ Please choose level from: {[self.object_type] + EDGE_MAPPING[self.object_type]}" @@ -222,42 +120,27 @@ def validate_ad_insights_level(self): def validate_ad_insights_breakdowns(self): if self.ad_insights: - missing_breakdowns = { - f[0] - for f in self._field_paths - if (f[0] in BREAKDOWNS) and (f[0] not in self.breakdowns) - } + missing_breakdowns = {f[0] for f in self._field_paths if (f[0] in BREAKDOWNS) and (f[0] not in self.breakdowns)} if missing_breakdowns != set(): - raise ClickException( - f"Wrong query. Please add to Breakdowns: {missing_breakdowns}" - ) + raise ClickException(f"Wrong query. Please add to Breakdowns: {missing_breakdowns}") def validate_ad_insights_action_breakdowns(self): if self.ad_insights: missing_action_breakdowns = { - flt - for f in self._field_paths - for flt in get_action_breakdown_filters(f) - if flt not in self.action_breakdowns + flt for f in self._field_paths for flt in get_action_breakdown_filters(f) if flt not in self.action_breakdowns } if missing_action_breakdowns != set(): - raise ClickException( - f"Wrong query. Please add to Action Breakdowns: {missing_action_breakdowns}" - ) + raise ClickException(f"Wrong query. Please add to Action Breakdowns: {missing_action_breakdowns}") def validate_ad_management_inputs(self): if not self.ad_insights: if self.breakdowns != [] or self.action_breakdowns != []: - raise ClickException( - "Wrong query. Ad Management queries do not accept Breakdowns nor Action Breakdowns." - ) + raise ClickException("Wrong query. Ad Management queries do not accept Breakdowns nor Action Breakdowns.") if self.time_increment: - raise ClickException( - "Wrong query. Ad Management queries do not accept the time_increment parameter." - ) + raise ClickException("Wrong query. Ad Management queries do not accept the time_increment parameter.") def get_params(self): """ @@ -292,22 +175,21 @@ def add_period_to_params(self, params): if self.ad_insights or self.level in ["campaign", "adset", "ad"]: if self.start_date and self.end_date: - logging.info("Date format used for request: start_date and end_date") + logger.info("Date format used for request: start_date and end_date") params["time_range"] = self.create_time_range() elif self.date_preset: - logging.info("Date format used for request: date_preset") + logger.info("Date format used for request: date_preset") params["date_preset"] = self.date_preset else: + logging.warning("No date range provided - Last 30 days by default") - logging.warning( - "https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters" - ) + logging.warning("https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters") + + logger.warning("No date range provided - Last 30 days by default") + logger.warning("https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters") def create_time_range(self): - return { - "since": self.start_date.strftime(DATEFORMAT), - "until": self.end_date.strftime(DATEFORMAT), - } + return {"since": self.start_date.strftime(DATEFORMAT), "until": self.end_date.strftime(DATEFORMAT)} def create_object(self, object_id): """ @@ -325,15 +207,40 @@ def query_ad_insights(self, fields, params, object_id): https://developers.facebook.com/docs/marketing-api/insights """ - logging.info( - f"Running Facebook Ad Insights query on {self.object_type}_id: {object_id}" - ) + logger.info(f"Running Facebook Ad Insights query on {self.object_type}_id: {object_id}") # Step 1 - Create Facebook object obj = self.create_object(object_id) - # Step 2 - Run Ad Insights query on Facebook object - yield from obj.get_insights(fields=fields, params=params) + report_job = self._get_report(obj, fields, params) + + yield from report_job.get_result() + + @retry(wait=wait_none(), stop=stop_after_attempt(3)) + def _get_report(self, obj, fields, params): + async_job = obj.get_insights(fields=fields, params=params, is_async=True) + self._wait_for_100_percent_completion(async_job) + self._wait_for_complete_report(async_job) + return async_job + + @retry(wait=wait_exponential(multiplier=5, max=300), stop=stop_after_delay(2400)) + def _wait_for_100_percent_completion(self, async_job): + async_job.api_get() + percent_completion = async_job[AdReportRun.Field.async_percent_completion] + status = async_job[AdReportRun.Field.async_status] + logger.info(f"{status}: {percent_completion}%") + if status == "Job Failed": + logger.info(status) + elif percent_completion < 100: + raise Exception(f"{status}: {percent_completion}") + + @retry(wait=wait_exponential(multiplier=10, max=60), stop=stop_after_delay(300)) + def _wait_for_complete_report(self, async_job): + async_job.api_get() + status = async_job[AdReportRun.Field.async_status] + if status == "Job Running": + raise Exception(status) + logger.info(status) def query_ad_management(self, fields, params, object_id): """ @@ -342,9 +249,7 @@ def query_ad_management(self, fields, params, object_id): Supported object nodes: AdAccount, Campaign, AdSet, Ad and AdCreative """ - logging.info( - f"Running Ad Management query on {self.object_type}_id: {object_id}" - ) + logger.info(f"Running Ad Management query on {self.object_type}_id: {object_id}") # Step 1 - Create Facebook object obj = self.create_object(object_id) @@ -365,9 +270,7 @@ def get_edge_objs_records(self, edge_objs, fields, params): total_edge_objs = edge_objs._total_count total_batches = ceil(total_edge_objs / BATCH_SIZE_LIMIT) - logging.info( - f"Making {total_batches} batch requests on a total of {total_edge_objs} {self.level}s" - ) + logger.info(f"Making {total_batches} batch requests on a total of {total_edge_objs} {self.level}s") for batch in generate_batches(edge_objs, BATCH_SIZE_LIMIT): @@ -385,13 +288,7 @@ def callback_success(response): def callback_failure(response): raise response.error() - obj.api_get( - fields=fields, - params=params, - batch=api_batch, - success=callback_success, - failure=callback_failure, - ) + obj.api_get(fields=fields, params=params, batch=api_batch, success=callback_success, failure=callback_failure) # Execute batch api_batch.execute() @@ -400,14 +297,12 @@ def callback_failure(response): def format_and_yield(self, record): """ - Parse a single record into an {item: value} dictionnary. + Parse a single record into an {item: value} dictionary. """ report = {} for field_path in self._field_paths: - field_values = get_field_values( - record, field_path, self.action_breakdowns, visited=[] - ) + field_values = get_field_values(record, field_path, self.action_breakdowns, visited=[]) if field_values: report.update(field_values) @@ -418,7 +313,7 @@ def format_and_yield(self, record): def result_generator(self, data): """ - Parse all records into an {item: value} dictionnary. + Parse all records into an {item: value} dictionary. """ for record in data: yield from self.format_and_yield(record) @@ -445,7 +340,4 @@ def get_data(self): def read(self): - yield NormalizedJSONStream( - "results_" + self.object_type + "_" + "_".join(self.object_ids), - self.get_data(), - ) + yield JSONStream("results_" + self.object_type + "_" + "_".join(self.object_ids), self.get_data()) diff --git a/ack/readers/google_ads/__init__.py b/ack/readers/google_ads/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/google_ads/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/google_ads/cli.py b/ack/readers/google_ads/cli.py new file mode 100644 index 00000000..d37d50ca --- /dev/null +++ b/ack/readers/google_ads/cli.py @@ -0,0 +1,92 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.google_ads.config import DATE_RANGE_TYPE_POSSIBLE_VALUES, REPORT_TYPE_POSSIBLE_VALUES +from ack.readers.google_ads.reader import GoogleAdsReader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +@click.command(name="read_googleads") +@click.option("--googleads-developer-token", required=True) +@click.option("--googleads-client-id", required=True) +@click.option("--googleads-client-secret", required=True) +@click.option("--googleads-refresh-token", required=True) +@click.option( + "--googleads-manager-id", + help="Google Ads Manager Account. " "Optional: can be used to get the reports from all accounts in hierarchy", +) +@click.option( + "--googleads-client-customer-id", + "googleads_client_customer_ids", + multiple=True, + help="Google Ads Client Account(s) to be called, thanks to their IDs.\n " + "This field is ignored if manager_id is specified (replaced by the accounts linked to the MCC)", +) +@click.option("--googleads-report-name", default="CustomReport", help="Name given to your Report") +@click.option( + "--googleads-report-type", + type=click.Choice(REPORT_TYPE_POSSIBLE_VALUES), + default=REPORT_TYPE_POSSIBLE_VALUES[0], + help="Desired Report Type to fetch\n" "https://developers.google.com/adwords/api/docs/appendix/reports#available-reports", +) +@click.option( + "--googleads-date-range-type", + type=click.Choice(DATE_RANGE_TYPE_POSSIBLE_VALUES), + help="Desired Date Range Type to fetch\n" "https://developers.google.com/adwords/api/docs/guides/reporting#date_ranges", +) +@click.option("--googleads-start-date", type=click.DateTime()) +@click.option("--googleads-end-date", type=click.DateTime()) +@click.option( + "--googleads-field", + "googleads_fields", + multiple=True, + help="Google Ads API fields for the request\n" + "https://developers.google.com/adwords/api/docs/appendix/reports#available-reports", +) +@click.option( + "--googleads-report-filter", + default="{}", + help="A filter can be applied on a chosen field, " + "in the form of a String containing a Dictionary \"{'field','operator','values'}\"\n" + "https://developers.google.com/adwords/api/docs/guides/reporting#create_a_report_definition", +) +@click.option( + "--googleads-include-zero-impressions", + default=True, + type=click.BOOL, + help="A boolean indicating whether the report should show rows with zero impressions", +) +@click.option( + "--googleads-filter-on-video-campaigns", + default=False, + type=click.BOOL, + help="A boolean indicating whether the report should return only Video campaigns\n" + "Only available if CampaignId is requested as a report field", +) +@click.option( + "--googleads-include-client-customer-id", + default=False, + type=click.BOOL, + help="A boolean indicating whether the Account ID should be included as a field in the output stream\n" + "(because AccountId is not available as a report field in the API)", +) +@processor("googleads_developer_token", "googleads_app_secret", "googleads_refresh_token") +def google_ads(**kwargs): + return GoogleAdsReader(**extract_args("googleads_", kwargs)) diff --git a/nck/helpers/googleads_helper.py b/ack/readers/google_ads/config.py similarity index 73% rename from nck/helpers/googleads_helper.py rename to ack/readers/google_ads/config.py index 4961a67c..a868eb7c 100644 --- a/nck/helpers/googleads_helper.py +++ b/ack/readers/google_ads/config.py @@ -15,6 +15,14 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime +from typing import List, Literal + +from pydantic import BaseModel, validator + +DATEFORMAT = "%Y%m%d" +ENCODING = "utf-8" + # https://developers.google.com/adwords/api/docs/appendix/reports#available-reports REPORT_TYPE_POSSIBLE_VALUES = [ "KEYWORDS_PERFORMANCE_REPORT", @@ -84,5 +92,30 @@ "CUSTOM_DATE", ] -# Encoding for Stream Reader -ENCODING = "utf-8" + +class GoogleAdsReaderConfig(BaseModel): + developer_token: str + client_id: str + client_secret: str + refresh_token: str + manager_id: str = None + client_customers_ids: List[str] = [] + report_name: str = "CustomReport" + report_type: Literal[tuple(REPORT_TYPE_POSSIBLE_VALUES)] = REPORT_TYPE_POSSIBLE_VALUES[0] + date_range_type: Literal[tuple(DATE_RANGE_TYPE_POSSIBLE_VALUES)] = None + start_date: datetime = None + end_date: datetime = None + fields: List[str] = [] + report_filter: str = "{}" + include_zero_impressions: bool = True + filter_on_video_campaigns: bool = False + include_client_customer_id: bool = False + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v diff --git a/nck/readers/googleads_reader.py b/ack/readers/google_ads/reader.py similarity index 60% rename from nck/readers/googleads_reader.py rename to ack/readers/google_ads/reader.py index a331f6c5..8b897e91 100644 --- a/nck/readers/googleads_reader.py +++ b/ack/readers/google_ads/reader.py @@ -15,108 +15,24 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import ast import codecs -import logging -import click -import re import csv -import ast - +import re from io import StringIO + from click import ClickException +from ack.config import logger +from ack.readers.google_ads.config import DATEFORMAT, ENCODING +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.exceptions import InconsistentDateDefinitionException, NoDateDefinitionException +from ack.utils.retry import retry + from googleads import adwords -from googleads.oauth2 import GoogleRefreshTokenClient from googleads.errors import AdWordsReportBadRequestError - -from nck.readers.reader import Reader -from nck.utils.args import extract_args -from nck.utils.retry import retry -from nck.commands.command import processor -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.helpers.googleads_helper import ( - REPORT_TYPE_POSSIBLE_VALUES, - DATE_RANGE_TYPE_POSSIBLE_VALUES, - ENCODING, -) - -DATEFORMAT = "%Y%m%d" - - -@click.command(name="read_googleads") -@click.option("--googleads-developer-token", required=True) -@click.option("--googleads-client-id", required=True) -@click.option("--googleads-client-secret", required=True) -@click.option("--googleads-refresh-token", required=True) -@click.option( - "--googleads-manager-id", - help="Google Ads Manager Account. " - "Optional: can be used to get the reports from all accounts in hierarchy", -) -@click.option( - "--googleads-client-customer-id", - "googleads_client_customer_ids", - multiple=True, - help="Google Ads Client Account(s) to be called, thanks to their IDs.\n " - "This field is ignored if manager_id is specified (replaced by the accounts linked to the MCC)", -) -@click.option( - "--googleads-report-name", default="CustomReport", help="Name given to your Report" -) -@click.option( - "--googleads-report-type", - type=click.Choice(REPORT_TYPE_POSSIBLE_VALUES), - default=REPORT_TYPE_POSSIBLE_VALUES[0], - help="Desired Report Type to fetch\n" - "https://developers.google.com/adwords/api/docs/appendix/reports#available-reports", -) -@click.option( - "--googleads-date-range-type", - type=click.Choice(DATE_RANGE_TYPE_POSSIBLE_VALUES), - default=DATE_RANGE_TYPE_POSSIBLE_VALUES[0], - help="Desired Date Range Type to fetch\n" - "https://developers.google.com/adwords/api/docs/guides/reporting#date_ranges", -) -@click.option("--googleads-start-date", type=click.DateTime()) -@click.option("--googleads-end-date", type=click.DateTime()) -@click.option( - "--googleads-field", - "googleads_fields", - multiple=True, - help="Google Ads API fields for the request\n" - "https://developers.google.com/adwords/api/docs/appendix/reports#available-reports", -) -@click.option( - "--googleads-report-filter", - default="{}", - help="A filter can be applied on a chosen field, " - "in the form of a String containing a Dictionary \"{'field','operator','values'}\"\n" - "https://developers.google.com/adwords/api/docs/guides/reporting#create_a_report_definition", -) -@click.option( - "--googleads-include-zero-impressions", - default=True, - type=click.BOOL, - help="A boolean indicating whether the report should show rows with zero impressions", -) -@click.option( - "--googleads-filter-on-video-campaigns", - default=False, - type=click.BOOL, - help="A boolean indicating whether the report should return only Video campaigns\n" - "Only available if CampaignId is requested as a report field", -) -@click.option( - "--googleads-include-client-customer-id", - default=False, - type=click.BOOL, - help="A boolean indicating whether the Account ID should be included as a field in the output stream\n" - "(because AccountId is not available as a report field in the API)", -) -@processor( - "googleads_developer_token", "googleads_app_secret", "googleads_refresh_token" -) -def google_ads(**kwargs): - return GoogleAdsReader(**extract_args("googleads_", kwargs)) +from googleads.oauth2 import GoogleRefreshTokenClient class GoogleAdsReader(Reader): @@ -143,9 +59,7 @@ def __init__( self.client_id = client_id self.client_secret = client_secret self.refresh_token = refresh_token - self.oauth2_client = GoogleRefreshTokenClient( - self.client_id, self.client_secret, self.refresh_token - ) + self.oauth2_client = GoogleRefreshTokenClient(self.client_id, self.client_secret, self.refresh_token) self.manager_id = manager_id self.client_customer_ids = list(client_customer_ids) self.report_name = report_name @@ -161,24 +75,21 @@ def __init__( self.download_format = "CSV" def init_adwords_client(self, id): - return adwords.AdWordsClient( - self.developer_token, self.oauth2_client, client_customer_id=id - ) + return adwords.AdWordsClient(self.developer_token, self.oauth2_client, client_customer_id=id) @staticmethod def valid_client_customer_id(client_customer_id): return re.match(r"\d{3}-\d{3}-\d{4}", client_customer_id) @retry - def fetch_report_from_gads_client_customer_obj( - self, report_definition, client_customer_id - ): + def fetch_report_from_gads_client_customer_obj(self, report_definition, client_customer_id): if not self.valid_client_customer_id(client_customer_id): raise ClickException( - f"Wrong format: {client_customer_id}. Client customer ID should be in the form 123-456-7890." + f"Invalid format: {client_customer_id}. Client customer ID should respect the following format 123-456-7890." ) else: try: + adwords_client = self.init_adwords_client(client_customer_id) report_downloader = adwords_client.GetReportDownloader() customer_report = report_downloader.DownloadReportAsStream( @@ -192,9 +103,7 @@ def fetch_report_from_gads_client_customer_obj( return customer_report except AdWordsReportBadRequestError as e: if e.type == "AuthorizationError.CUSTOMER_NOT_ACTIVE": - logging.warning( - f"Skipping clientCustomerId {client_customer_id} (inactive)." - ) + logger.warning(f"Skipping clientCustomerId {client_customer_id} (inactive).") else: raise Exception(f"Wrong request. Error type: {e.type}") @@ -211,18 +120,14 @@ def get_customer_ids(self, manager_id): """ adwords_client = self.init_adwords_client(manager_id) - managed_customer_service = adwords_client.GetService( - "ManagedCustomerService", version="v201809" - ) + managed_customer_service = adwords_client.GetService("ManagedCustomerService", version="v201809") offset = 0 PAGE_SIZE = 500 # Get the account hierarchy for this account. selector = { "fields": ["CustomerId"], - "predicates": [ - {"field": "CanManageClients", "operator": "EQUALS", "values": [False]}, - ], + "predicates": [{"field": "CanManageClients", "operator": "EQUALS", "values": [False]}], "paging": {"startIndex": str(offset), "numberResults": str(PAGE_SIZE)}, } @@ -234,9 +139,7 @@ def get_customer_ids(self, manager_id): if page and "entries" in page and page["entries"]: for entry in page["entries"]: - client_customer_ids.append( - self.format_customer_id(entry["customerId"]) - ) + client_customer_ids.append(self.format_customer_id(entry["customerId"])) else: raise Exception("Can't retrieve any customer ID.") offset += PAGE_SIZE @@ -268,34 +171,26 @@ def get_report_definition(self): def add_period_to_report_definition(self, report_definition): """Add Date period from provided start date and end date, when CUSTOM DATE range is called""" - if (self.date_range_type == "CUSTOM_DATE") & ( - not self.start_date or not self.end_date - ): - logging.warning( - "Custom Date Range selected but no date range provided :" - + DATE_RANGE_TYPE_POSSIBLE_VALUES[0] - + " by default" - ) - logging.warning( - "https://developers.google.com/adwords/api/docs/guides/reporting#custom_date_ranges" + if (self.date_range_type == "CUSTOM_DATE") & (not self.start_date or not self.end_date): + raise NoDateDefinitionException( + """You must define a couple + start-date/end-date when using a custom_date""" ) - report_definition["dateRangeType"] = DATE_RANGE_TYPE_POSSIBLE_VALUES[0] elif self.date_range_type == "CUSTOM_DATE": - logging.info( - "Date format used for request : Custom Date Range with start_date and end_date provided" - ) - report_definition["selector"]["dateRange"] = self.create_date_range( - self.start_date, self.end_date + logger.info("Date format used for request : Custom Date Range with start_date and end_date provided") + report_definition["selector"]["dateRange"] = self.create_date_range(self.start_date, self.end_date) + elif self.start_date is not None and self.end_date is not None and self.date_range_type != "CUSTOM_DATE": + raise InconsistentDateDefinitionException( + "You must define either the couple start_date and end_date or a date_range, \ + different from CUSTOM_DATE, but not both" ) def add_report_filter(self, report_definition): """Check if a filter was provided and contains the necessary information""" if not self.report_filter: - logging.info("No filter provided by user") - elif all( - required_param in self.report_filter.keys() - for required_param in ("field", "operator", "values") - ): + logger.info("No filter provided by user") + + elif all(required_param in self.report_filter.keys() for required_param in ("field", "operator", "values")): report_definition["selector"]["predicates"] = { "field": self.report_filter["field"], "operator": self.report_filter["operator"], @@ -309,10 +204,7 @@ def add_report_filter(self, report_definition): @staticmethod def create_date_range(start_date, end_date): - return { - "min": start_date.strftime(DATEFORMAT), - "max": end_date.strftime(DATEFORMAT), - } + return {"min": start_date.strftime(DATEFORMAT), "max": end_date.strftime(DATEFORMAT)} def list_video_campaign_ids(self): video_campaign_report_definition = self.get_video_campaign_report_definition() @@ -330,9 +222,7 @@ def list_video_campaign_ids(self): def get_video_campaign_report_definition(self): if "CampaignId" not in self.fields: - raise ClickException( - "Filter On Video Campaigns is only available if 'CampaignId' is requested as a report field" - ) + raise ClickException("Filter On Video Campaigns is only available if 'CampaignId' is requested as a report field") video_campaigns_report = { "reportName": "video campaigns ids", "dateRangeType": self.date_range_type, @@ -350,9 +240,7 @@ def format_and_yield(self): video_campaign_ids = self.list_video_campaign_ids() for googleads_account_id in self.client_customer_ids: - customer_report = self.fetch_report_from_gads_client_customer_obj( - report_definition, googleads_account_id - ) + customer_report = self.fetch_report_from_gads_client_customer_obj(report_definition, googleads_account_id) if customer_report: customer_report = stream_reader(customer_report) for row in customer_report: @@ -370,7 +258,4 @@ def read(self): if self.manager_id: self.client_customer_ids = self.get_customer_ids(self.manager_id) - yield NormalizedJSONStream( - "results_" + self.report_name + "_" + "_".join(self.client_customer_ids), - self.format_and_yield(), - ) + yield JSONStream("results_" + self.report_name + "_" + "_".join(self.client_customer_ids), self.format_and_yield()) diff --git a/ack/readers/google_analytics/__init__.py b/ack/readers/google_analytics/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/google_analytics/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/google_analytics/cli.py b/ack/readers/google_analytics/cli.py new file mode 100644 index 00000000..b6a69f18 --- /dev/null +++ b/ack/readers/google_analytics/cli.py @@ -0,0 +1,46 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.google_analytics.reader import GoogleAnalyticsReader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +@click.command(name="read_ga") +@click.option("--ga-access-token", default=None) +@click.option("--ga-refresh-token", required=True) +@click.option("--ga-client-id", required=True) +@click.option("--ga-client-secret", required=True) +@click.option("--ga-view-id", default="", multiple=True) +@click.option("--ga-account-id", default=[], multiple=True) +@click.option("--ga-dimension", multiple=True) +@click.option("--ga-metric", multiple=True) +@click.option("--ga-segment-id", multiple=True) +@click.option("--ga-start-date", type=click.DateTime(), default=None) +@click.option("--ga-end-date", type=click.DateTime(), default=None) +@click.option("--ga-date-range", nargs=2, type=click.DateTime(), default=None) +@click.option( + "--ga-day-range", type=click.Choice(["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_7_DAYS", "LAST_90_DAYS"]), default=None +) +@click.option("--ga-sampling-level", type=click.Choice(["SMALL", "DEFAULT", "LARGE"]), default="LARGE") +@click.option("--ga-add-view", is_flag=True) +@processor("ga_access_token", "ga_refresh_token", "ga_client_secret") +def google_analytics(**kwargs): + # Should handle valid combinations dimensions/metrics in the API + return GoogleAnalyticsReader(**extract_args("ga_", kwargs)) diff --git a/ack/readers/google_analytics/config.py b/ack/readers/google_analytics/config.py new file mode 100644 index 00000000..68db7343 --- /dev/null +++ b/ack/readers/google_analytics/config.py @@ -0,0 +1,56 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime +from typing import List, Tuple, Literal + +from pydantic import BaseModel, validator + +GOOGLE_TOKEN_URI = "https://accounts.google.com/o/oauth2/token" +DISCOVERY_URI = "https://analyticsreporting.googleapis.com/$discovery/rest" +DATEFORMAT = "%Y-%m-%d" +DAY_RANGES = ("PREVIOUS_DAY", "LAST_30_DAYS", "LAST_7_DAYS", "LAST_90_DAYS") +SAMPLING_LEVELS = ("SMALL", "DEFAULT", "LARGE") + +PREFIX = "^ga:" + + +class GoogleAnalyticsReaderConfig(BaseModel): + access_token: str = None + refresh_token: str + client_id: str + client_secret: str + view_id: List[str] = [""] + account_id: List[str] = [] + dimension: List[str] = [] + metric: List[str] = [] + segment_id: List[str] = [] + start_date: datetime = None + end_date: datetime = None + date_range: Tuple[datetime, datetime] = None + day_range: Literal[DAY_RANGES] = None + sampling_level: Literal[SAMPLING_LEVELS] = "LARGE" + add_view: bool = False + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v diff --git a/nck/readers/ga_reader.py b/ack/readers/google_analytics/reader.py similarity index 65% rename from nck/readers/ga_reader.py rename to ack/readers/google_analytics/reader.py index 80cd663f..6f32ad76 100644 --- a/nck/readers/ga_reader.py +++ b/ack/readers/google_analytics/reader.py @@ -15,51 +15,22 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click -import httplib2 -import logging -import re from datetime import datetime, timedelta + +import httplib2 from click import ClickException from googleapiclient import discovery -from oauth2client import client, GOOGLE_REVOKE_URI - -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.utils.args import extract_args -from nck.utils.retry import retry -from nck.streams.normalized_json_stream import NormalizedJSONStream - -DISCOVERY_URI = "https://analyticsreporting.googleapis.com/$discovery/rest" -DATEFORMAT = "%Y-%m-%d" - - -@click.command(name="read_ga") -@click.option("--ga-access-token", default=None) -@click.option("--ga-refresh-token", required=True) -@click.option("--ga-client-id", required=True) -@click.option("--ga-client-secret", required=True) -@click.option("--ga-view-id", default="", multiple=True) -@click.option("--ga-account-id", default=[], multiple=True) -@click.option("--ga-dimension", multiple=True) -@click.option("--ga-metric", multiple=True) -@click.option("--ga-segment-id", multiple=True) -@click.option("--ga-start-date", type=click.DateTime(), default=None) -@click.option("--ga-end-date", type=click.DateTime(), default=None) -@click.option("--ga-date-range", nargs=2, type=click.DateTime(), default=None) -@click.option( - "--ga-day-range", type=click.Choice(["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_7_DAYS", "LAST_90_DAYS"]), default=None -) -@click.option("--ga-sampling-level", type=click.Choice(["SMALL", "DEFAULT", "LARGE"]), default="LARGE") -@click.option("--ga-add-view", is_flag=True) -@processor("ga_access_token", "ga_refresh_token", "ga_client_secret") -def ga(**kwargs): - # Should handle valid combinations dimensions/metrics in the API - return GaReader(**extract_args("ga_", kwargs)) - - -class GaReader(Reader): +from ack.config import logger +from ack.readers.google_analytics.config import DATEFORMAT, DISCOVERY_URI, GOOGLE_TOKEN_URI, PREFIX +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.retry import retry +from ack.utils.text import strip_prefix +from oauth2client import GOOGLE_REVOKE_URI, client + + +class GoogleAnalyticsReader(Reader): def __init__(self, access_token, refresh_token, client_id, client_secret, **kwargs): credentials = client.GoogleCredentials( access_token=access_token, @@ -67,7 +38,7 @@ def __init__(self, access_token, refresh_token, client_id, client_secret, **kwar client_secret=client_secret, refresh_token=refresh_token, token_expiry=None, - token_uri="https://accounts.google.com/o/oauth2/token", + token_uri=GOOGLE_TOKEN_URI, user_agent=None, revoke_uri=GOOGLE_REVOKE_URI, ) @@ -93,16 +64,16 @@ def get_date_range_for_ga_request(self): day_range = self.kwargs.get("day_range") if start_date and end_date: - logging.info("Date format used for request : startDate and endDate") + logger.info("Date format used for request : startDate and endDate") return self.create_date_range(start_date, end_date) elif date_range: - logging.info("Date format used for request : dateRange") + logger.info("Date format used for request : dateRange") return self.create_date_range(date_range[0], date_range[1]) elif day_range: - logging.info("Date format used for request : dayRange") + logger.info("Date format used for request : dayRange") return self.generate_date_range_with_day_range(day_range) else: - logging.warning("No date range provided - Last 7 days by default") + logger.warning("No date range provided - Last 7 days by default") return [] def generate_date_range_with_day_range(self, day_range): @@ -122,7 +93,7 @@ def get_days_delta(day_range): try: days_delta = delta_mapping[day_range] except KeyError: - raise ClickException("{} is not handled by the reader".format(day_range)) + raise ClickException(f"{day_range} is not handled by the reader") return days_delta def get_view_id_report_request(self, view_id): @@ -147,15 +118,15 @@ def log_sampling(report): data = report.get("data", {}) if data.get("samplesReadCounts") is not None: - logging.warning("☝️Report has been sampled.") + logger.warning("☝️Report has been sampled.") sample_reads = data["samplesReadCounts"][0] sample_space = data["samplingSpaceSizes"][0] - logging.warning(f"sample reads : {sample_reads}") - logging.warning(f"sample space :{sample_space}") + logger.warning(f"sample reads : {sample_reads}") + logger.warning(f"sample space :{sample_space}") - logging.warning(f"sample percent :{100 * int(sample_reads) / int(sample_space)}%") + logger.warning(f"sample percent :{100 * int(sample_reads) / int(sample_space)}%") else: - logging.info("Report is not sampled.") + logger.info("Report is not sampled.") @staticmethod def format_date(dateYYYYMMDD): @@ -176,18 +147,21 @@ def _run_query(self, view_id): report_page = self.client_v4.reports().batchGet(body=body).execute() yield report_page["reports"][0] except Exception as e: - raise ClickException("failed while requesting pages of the report: {}".format(e)) + raise ClickException(f"failed while requesting pages of the report: {e}") def format_and_yield(self, view_id, report): - dimension_names = report["columnHeader"]["dimensions"] - metric_names = [m["name"] for m in report["columnHeader"]["metricHeader"]["metricHeaderEntries"]] + dimension_names = [strip_prefix(dim, PREFIX) for dim in report["columnHeader"]["dimensions"]] + metric_names = [ + strip_prefix(met["name"], PREFIX) for met in report["columnHeader"]["metricHeader"]["metricHeaderEntries"] + ] + for row in report["data"].get("rows", []): row_dimension_values = row["dimensions"] row_metric_values = row["metrics"][0]["values"] formatted_response = {} if self.add_view: - formatted_response["ga:viewId"] = view_id + formatted_response["viewId"] = view_id for dim, value in zip(dimension_names, row_dimension_values): formatted_response[dim] = value @@ -195,8 +169,8 @@ def format_and_yield(self, view_id, report): for metric, metric_value in zip(metric_names, row_metric_values): formatted_response[metric] = metric_value - if "ga:date" in formatted_response: - formatted_response["ga:date"] = GaReader.format_date(formatted_response["ga:date"]) + if "date" in formatted_response: + formatted_response["date"] = GoogleAnalyticsReader.format_date(formatted_response["date"]) yield formatted_response @@ -207,14 +181,4 @@ def result_generator(self): yield from self.format_and_yield(view_id, report) def read(self): - yield GaStream( - "result_view_" + "_".join(self.view_ids), self.result_generator() - ) - - -class GaStream(NormalizedJSONStream): - GA_PREFIX = "^ga:" - - @staticmethod - def _normalize_key(key): - return re.split(GaStream.GA_PREFIX, key)[-1].replace(" ", "_").replace("-", "_") + yield JSONStream("result_view_" + "_".join(self.view_ids), self.result_generator()) diff --git a/ack/readers/google_cloud_storage/__init__.py b/ack/readers/google_cloud_storage/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/google_cloud_storage/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/google_cloud_storage/cli.py b/ack/readers/google_cloud_storage/cli.py new file mode 100644 index 00000000..22da8338 --- /dev/null +++ b/ack/readers/google_cloud_storage/cli.py @@ -0,0 +1,34 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.google_cloud_storage.reader import GoogleCloudStorageReader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +@click.command(name="read_gcs") +@click.option("--gcs-bucket", required=True) +@click.option("--gcs-prefix", required=True, multiple=True) +@click.option("--gcs-format", required=True, type=click.Choice(["csv", "gz", "njson"])) +@click.option("--gcs-dest-key-split", default=-1, type=int) +@click.option("--gcs-csv-delimiter", default=",") +@click.option("--gcs-csv-fieldnames", default=None) +@processor() +def google_cloud_storage(**kwargs): + return GoogleCloudStorageReader(**extract_args("gcs_", kwargs)) diff --git a/ack/readers/google_cloud_storage/config.py b/ack/readers/google_cloud_storage/config.py new file mode 100644 index 00000000..b423eed2 --- /dev/null +++ b/ack/readers/google_cloud_storage/config.py @@ -0,0 +1,15 @@ +from typing import List, Literal + +from pydantic import BaseModel + + +FORMATS = ("csv", "gz", "njson") + + +class GoogleCloudStorageReaderConfig(BaseModel): + bucket: str + prefix: List[str] + format: Literal[FORMATS] + dest_key_split: int = -1 + csv_delimiter: str = "," + fieldnames: str = None diff --git a/nck/readers/gcs_reader.py b/ack/readers/google_cloud_storage/reader.py similarity index 61% rename from nck/readers/gcs_reader.py rename to ack/readers/google_cloud_storage/reader.py index be64859e..837600a1 100644 --- a/nck/readers/gcs_reader.py +++ b/ack/readers/google_cloud_storage/reader.py @@ -15,38 +15,20 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click -from google.cloud import storage -from nck.commands.command import processor -from nck.readers.objectstorage_reader import ObjectStorageReader -from nck.utils.args import extract_args -from nck.helpers.google_base import GoogleBaseClass import urllib - -@click.command(name="read_gcs") -@click.option("--gcs-bucket", required=True) -@click.option("--gcs-prefix", required=True, multiple=True) -@click.option("--gcs-format", required=True, type=click.Choice(["csv", "gz"])) -@click.option("--gcs-dest-key-split", default=-1, type=int) -@click.option("--gcs-csv-delimiter", default=",") -@click.option("--gcs-csv-fieldnames", default=None) -@processor() -def gcs(**kwargs): - return GCSReader(**extract_args("gcs_", kwargs)) +from google.cloud import storage +from ack.clients.google.client import GoogleClient +from ack.readers.object_storage.reader import ObjectStorageReader -class GCSReader(ObjectStorageReader, GoogleBaseClass): +class GoogleCloudStorageReader(ObjectStorageReader, GoogleClient): def __init__(self, bucket, prefix, format, dest_key_split=-1, **kwargs): - super().__init__( - bucket, prefix, format, dest_key_split, platform="GCS", **kwargs - ) + super().__init__(bucket, prefix, format, dest_key_split, platform="GCS", **kwargs) def create_client(self, config): - return storage.Client( - credentials=self._get_credentials(), project=config.project_id - ) + return storage.Client(credentials=self._get_credentials(), project=config.project_id) def create_bucket(self, client, bucket): return client.bucket(bucket) diff --git a/ack/readers/google_dbm/__init__.py b/ack/readers/google_dbm/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/google_dbm/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/google_dbm/cli.py b/ack/readers/google_dbm/cli.py new file mode 100644 index 00000000..ae2a393f --- /dev/null +++ b/ack/readers/google_dbm/cli.py @@ -0,0 +1,64 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.google_dbm.config import POSSIBLE_REQUEST_TYPES +from ack.readers.google_dbm.reader import GoogleDBMReader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +@click.command(name="read_dbm") +@click.option("--dbm-access-token", default=None) +@click.option("--dbm-refresh-token", required=True) +@click.option("--dbm-client-id", required=True) +@click.option("--dbm-client-secret", required=True) +@click.option("--dbm-query-metric", multiple=True) +@click.option("--dbm-query-dimension", multiple=True) +@click.option("--dbm-request-type", type=click.Choice(POSSIBLE_REQUEST_TYPES), required=True) +@click.option("--dbm-query-id") +@click.option("--dbm-query-title") +@click.option("--dbm-query-frequency", default="ONE_TIME") +@click.option("--dbm-query-param-type", default="TYPE_TRUEVIEW") +@click.option("--dbm-start-date", type=click.DateTime()) +@click.option("--dbm-end-date", type=click.DateTime()) +@click.option( + "--dbm-add-date-to-report", + type=click.BOOL, + default=False, + help=( + "Sometimes the date range on which metrics are computed is missing from the report. " + "If this option is set to True, this range will be added." + ), +) +@click.option("--dbm-filter", type=click.Tuple([str, str]), multiple=True) +@click.option("--dbm-file-type", multiple=True) +@click.option( + "--dbm-date-format", + default="%Y-%m-%d", + help="Add optional date format for the output stream. " + "Follow the syntax of https://docs.python.org/3.8/library/datetime.html#strftime-strptime-behavior", +) +@click.option( + "--dbm-day-range", + type=click.Choice(["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_90_DAYS", "LAST_7_DAYS", "PREVIOUS_MONTH", "PREVIOUS_WEEK"]), +) +@processor("dbm_access_token", "dbm_refresh_token", "dbm_client_secret") +def google_dbm(**kwargs): + # Should add validation argument in function of request_type + return GoogleDBMReader(**extract_args("dbm_", kwargs)) diff --git a/ack/readers/google_dbm/config.py b/ack/readers/google_dbm/config.py new file mode 100644 index 00000000..93e963cc --- /dev/null +++ b/ack/readers/google_dbm/config.py @@ -0,0 +1,63 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime +from typing import List, Literal, Tuple + +from pydantic import BaseModel, validator + +GOOGLE_TOKEN_URI = "https://accounts.google.com/o/oauth2/token" + +DAY_RANGES = ("PREVIOUS_DAY", "LAST_30_DAYS", "LAST_90_DAYS", "LAST_7_DAYS", "PREVIOUS_MONTH", "PREVIOUS_WEEK") +POSSIBLE_REQUEST_TYPES = [ + "existing_query", + "custom_query", + "existing_query_report", + "custom_query_report", + "lineitems_objects", + "list_reports", +] + + +class GoogleDBMReaderConfig(BaseModel): + access_token: str = None + refresh_token: str + client_id: str + client_secret: str + query_metric: List[str] = [] + query_dimension: List[str] = [] + request_type: Literal[tuple(POSSIBLE_REQUEST_TYPES)] + query_id: str = None + query_title: str = None + query_frequency: str = "ONE_TIME" + query_param_type: str = "TYPE_TRUEVIEW" + start_date: datetime = None + end_date: datetime = None + add_date_to_report: bool = False + filter: List[Tuple[str, str]] = [] + file_type: List[str] = [] + date_format: str = "%Y-%m-%d" + day_range: Literal[DAY_RANGES] = None + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v diff --git a/nck/readers/dbm_reader.py b/ack/readers/google_dbm/reader.py similarity index 70% rename from nck/readers/dbm_reader.py rename to ack/readers/google_dbm/reader.py index cb7aa80a..2caf1d9f 100644 --- a/nck/readers/dbm_reader.py +++ b/ack/readers/google_dbm/reader.py @@ -15,80 +15,24 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click -import logging -import httplib2 -import requests import datetime -from googleapiclient import discovery -from oauth2client import client, GOOGLE_REVOKE_URI -from tenacity import retry, wait_exponential, stop_after_delay +import httplib2 +import requests from click import ClickException +from googleapiclient import discovery +from ack.config import logger +from ack.readers.google_dbm.config import GOOGLE_TOKEN_URI +from ack.readers.reader import Reader +from ack.streams.format_date_stream import FormatDateStream +from ack.utils.date_handler import check_date_range_definition_conformity, get_date_start_and_date_stop_from_date_range +from ack.utils.text import get_report_generator_from_flat_file, skip_last +from oauth2client import GOOGLE_REVOKE_URI, client +from tenacity import retry, stop_after_delay, wait_exponential -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.utils.args import extract_args -from nck.streams.format_date_stream import FormatDateStream - -from nck.utils.text import get_report_generator_from_flat_file, skip_last -from nck.utils.date_handler import get_date_start_and_date_stop_from_range - -from nck.helpers.dbm_helper import POSSIBLE_REQUEST_TYPES - -DISCOVERY_URI = "https://analyticsreporting.googleapis.com/$discovery/rest" - -default_start_date = datetime.date.today() - datetime.timedelta(days=2) -default_end_date = datetime.date.today() - - -@click.command(name="read_dbm") -@click.option("--dbm-access-token", default=None) -@click.option("--dbm-refresh-token", required=True) -@click.option("--dbm-client-id", required=True) -@click.option("--dbm-client-secret", required=True) -@click.option("--dbm-query-metric", multiple=True) -@click.option("--dbm-query-dimension", multiple=True) -@click.option("--dbm-request-type", type=click.Choice(POSSIBLE_REQUEST_TYPES), required=True) -@click.option("--dbm-query-id") -@click.option("--dbm-query-title") -@click.option("--dbm-query-frequency", default="ONE_TIME") -@click.option("--dbm-query-param-type", default="TYPE_TRUEVIEW") -@click.option("--dbm-start-date", type=click.DateTime()) -@click.option("--dbm-end-date", type=click.DateTime()) -@click.option( - "--dbm-add-date-to-report", - type=click.BOOL, - default=False, - help=( - "Sometimes the date range on which metrics are computed is missing from the report. " - "If this option is set to True, this range will be added." - ), -) -@click.option("--dbm-filter", type=click.Tuple([str, str]), multiple=True) -@click.option("--dbm-file-type", multiple=True) -@click.option( - "--dbm-date-format", - default="%Y-%m-%d", - help="And optional date format for the output stream. " - "Follow the syntax of https://docs.python.org/3.8/library/datetime.html#strftime-strptime-behavior", -) -@click.option( - "--dbm-day-range", - required=True, - default="LAST_7_DAYS", - type=click.Choice( - ["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_90_DAYS", "LAST_7_DAYS", "PREVIOUS_MONTH", "PREVIOUS_WEEK"] - ), -) -@processor("dbm_access_token", "dbm_refresh_token", "dbm_client_secret") -def dbm(**kwargs): - # Should add validation argument in function of request_type - return DbmReader(**extract_args("dbm_", kwargs)) - - -class DbmReader(Reader): + +class GoogleDBMReader(Reader): API_NAME = "doubleclickbidmanager" API_VERSION = "v1.1" @@ -99,7 +43,7 @@ def __init__(self, access_token, refresh_token, client_secret, client_id, **kwar client_secret=client_secret, refresh_token=refresh_token, token_expiry=None, - token_uri="https://accounts.google.com/o/oauth2/token", + token_uri=GOOGLE_TOKEN_URI, user_agent=None, revoke_uri=GOOGLE_REVOKE_URI, ) @@ -111,6 +55,10 @@ def __init__(self, access_token, refresh_token, client_secret, client_id, **kwar self.kwargs = kwargs + check_date_range_definition_conformity( + self.kwargs.get("start_date"), self.kwargs.get("end_date"), self.kwargs.get("day_range") + ) + def get_query(self, query_id): if query_id: return self._client.queries().getquery(queryId=query_id).execute() @@ -146,9 +94,7 @@ def get_query_body(self): body_q["reportDataStartTimeMs"] = 1000 * int( (self.kwargs.get("start_date") + datetime.timedelta(days=1)).timestamp() ) - body_q["reportDataEndTimeMs"] = 1000 * int( - (self.kwargs.get("end_date") + datetime.timedelta(days=1)).timestamp() - ) + body_q["reportDataEndTimeMs"] = 1000 * int((self.kwargs.get("end_date") + datetime.timedelta(days=1)).timestamp()) return body_q def create_and_get_query(self): @@ -158,7 +104,7 @@ def create_and_get_query(self): @retry(wait=wait_exponential(multiplier=1, min=60, max=3600), stop=stop_after_delay(36000)) def _wait_for_query(self, query_id): - logging.info("waiting for query of id : {} to complete running".format(query_id)) + logger.info(f"waiting for query of id : {query_id} to complete running") query_infos = self.get_query(query_id) if query_infos["metadata"]["running"] or ( "googleCloudStoragePathForLatestReport" not in query_infos["metadata"] @@ -190,14 +136,12 @@ def get_query_report(self, existing_query=True): url = self.get_query_report_url(existing_query) report = requests.get(url, stream=True) if self.kwargs["query_param_type"] == "TYPE_REACH_AND_FREQUENCY" and self.kwargs["add_date_to_report"]: - start, stop = get_date_start_and_date_stop_from_range(self.kwargs["day_range"]) + start, stop = get_date_start_and_date_stop_from_date_range(self.kwargs["day_range"]) column_dict = { "date_start": start.strftime(self.kwargs.get("date_format")), "date_stop": stop.strftime(self.kwargs.get("date_format")), } - report_gen = get_report_generator_from_flat_file( - report.iter_lines(), add_column=True, column_dict=column_dict - ) + report_gen = get_report_generator_from_flat_file(report.iter_lines(), add_column=True, column_dict=column_dict) return skip_last(report_gen, 1) else: report_gen = get_report_generator_from_flat_file(report.iter_lines()) diff --git a/ack/readers/google_dcm/__init__.py b/ack/readers/google_dcm/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/google_dcm/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/google_dcm/cli.py b/ack/readers/google_dcm/cli.py new file mode 100644 index 00000000..396ccf73 --- /dev/null +++ b/ack/readers/google_dcm/cli.py @@ -0,0 +1,64 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.google_dcm.config import REPORT_TYPES +from ack.readers.google_dcm.reader import GoogleDCMReader +from ack.utils.args import extract_args +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS +from ack.utils.processor import processor + + +@click.command(name="read_dcm") +@click.option("--dcm-access-token", default=None) +@click.option("--dcm-client-id", required=True) +@click.option("--dcm-client-secret", required=True) +@click.option("--dcm-refresh-token", required=True) +@click.option("--dcm-profile-id", "dcm_profile_ids", required=True, multiple=True) +@click.option("--dcm-report-name", default="DCM Report") +@click.option("--dcm-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) +@click.option( + "--dcm-metric", + "dcm_metrics", + multiple=True, + help="https://developers.google.com/doubleclick-advertisers/v3.3/dimensions/#standard-metrics", +) +@click.option( + "--dcm-dimension", + "dcm_dimensions", + multiple=True, + help="https://developers.google.com/doubleclick-advertisers/v3.3/dimensions/#standard-dimensions", +) +@click.option("--dcm-start-date", type=click.DateTime(), help="Start date of the report") +@click.option("--dcm-end-date", type=click.DateTime(), help="End date of the report") +@click.option( + "--dcm-filter", + "dcm_filters", + type=click.Tuple([str, str]), + multiple=True, + help="A filter is a tuple following this pattern: (dimensionName, dimensionValue). " + "https://developers.google.com/doubleclick-advertisers/v3.3/dimensions/#standard-filters", +) +@click.option( + "--dcm-date-range", + type=click.Choice(DEFAULT_DATE_RANGE_FUNCTIONS.keys()), + help=f"One of the available ACK default date ranges: {DEFAULT_DATE_RANGE_FUNCTIONS.keys()}", +) +@processor("dcm_access_token", "dcm_refresh_token", "dcm_client_secret") +def google_dcm(**kwargs): + return GoogleDCMReader(**extract_args("dcm_", kwargs)) diff --git a/nck/helpers/dcm_helper.py b/ack/readers/google_dcm/config.py similarity index 59% rename from nck/helpers/dcm_helper.py rename to ack/readers/google_dcm/config.py index 05f64a7f..6ecbc690 100644 --- a/nck/helpers/dcm_helper.py +++ b/ack/readers/google_dcm/config.py @@ -15,6 +15,16 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime +from typing import List, Literal, Tuple + +from pydantic import BaseModel, validator + +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS + +ENCODING = "utf-8" +PREFIX = "^dfa:" + CRITERIA_MAPPING = { "STANDARD": "criteria", "REACH": "reachCriteria", @@ -44,3 +54,28 @@ "YEAR_TO_DATE", "YESTERDAY", ] + + +class GoogleDCMReaderConfig(BaseModel): + access_token: str = None + client_id: str + client_secret: str + refresh_token: str + profile_ids: List[str] + report_name: str = "DCM Report" + report_type: Literal[tuple(REPORT_TYPES)] = REPORT_TYPES[0] + metrics: List[str] = [] + dimensions: List[str] = [] + start_date: datetime = None + end_date: datetime = None + filters: List[Tuple[str, str]] = [] + date_range: Literal[tuple(DEFAULT_DATE_RANGE_FUNCTIONS.keys())] = None + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v diff --git a/nck/readers/dcm_reader.py b/ack/readers/google_dcm/reader.py similarity index 54% rename from nck/readers/dcm_reader.py rename to ack/readers/google_dcm/reader.py index 2b565727..462ce00f 100644 --- a/nck/readers/dcm_reader.py +++ b/ack/readers/google_dcm/reader.py @@ -15,59 +15,19 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import csv -import re -import click +import csv from io import StringIO -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.utils.args import extract_args -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.clients.dcm_client import DCMClient -from nck.helpers.dcm_helper import REPORT_TYPES - -DATEFORMAT = "%Y-%m-%d" -ENCODING = "utf-8" - +from ack.clients.google_dcm.client import GoogleDCMClient +from ack.readers.google_dcm.config import ENCODING, PREFIX +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.date_handler import build_date_range +from ack.utils.text import strip_prefix -@click.command(name="read_dcm") -@click.option("--dcm-access-token", default=None) -@click.option("--dcm-client-id", required=True) -@click.option("--dcm-client-secret", required=True) -@click.option("--dcm-refresh-token", required=True) -@click.option("--dcm-profile-id", "dcm_profile_ids", required=True, multiple=True) -@click.option("--dcm-report-name", default="DCM Report") -@click.option("--dcm-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) -@click.option( - "--dcm-metric", - "dcm_metrics", - multiple=True, - help="https://developers.google.com/doubleclick-advertisers/v3.3/dimensions/#standard-metrics", -) -@click.option( - "--dcm-dimension", - "dcm_dimensions", - multiple=True, - help="https://developers.google.com/doubleclick-advertisers/v3.3/dimensions/#standard-dimensions", -) -@click.option("--dcm-start-date", type=click.DateTime(), required=True) -@click.option("--dcm-end-date", type=click.DateTime(), required=True) -@click.option( - "--dcm-filter", - "dcm_filters", - type=click.Tuple([str, str]), - multiple=True, - help="A filter is a tuple following this pattern: (dimensionName, dimensionValue). " - "https://developers.google.com/doubleclick-advertisers/v3.3/dimensions/#standard-filters", -) -@processor("dcm_access_token", "dcm_refresh_token", "dcm_client_secret") -def dcm(**kwargs): - return DcmReader(**extract_args("dcm_", kwargs)) - -class DcmReader(Reader): +class GoogleDCMReader(Reader): def __init__( self, access_token, @@ -82,15 +42,15 @@ def __init__( start_date, end_date, filters, + date_range, ): - self.dcm_client = DCMClient(access_token, client_id, client_secret, refresh_token) + self.dcm_client = GoogleDCMClient(access_token, client_id, client_secret, refresh_token) self.profile_ids = list(profile_ids) self.report_name = report_name self.report_type = report_type self.metrics = list(metrics) self.dimensions = list(dimensions) - self.start_date = start_date - self.end_date = end_date + self.start_date, self.end_date = build_date_range(start_date, end_date, date_range) self.filters = list(filters) def format_response(self, report_generator): @@ -106,7 +66,8 @@ def format_response(self, report_generator): is_main_data = False if is_main_data: - csv_reader = csv.DictReader(StringIO(decoded_row), self.dimensions + self.metrics) + formatted_keys = [strip_prefix(key, PREFIX) for key in self.dimensions + self.metrics] + csv_reader = csv.DictReader(StringIO(decoded_row), formatted_keys) yield next(csv_reader) def result_generator(self): @@ -124,12 +85,4 @@ def result_generator(self): yield from self.format_response(report_generator) def read(self): - yield DCMStream("results" + "_".join(self.profile_ids), self.result_generator()) - - -class DCMStream(NormalizedJSONStream): - DCM_PREFIX = "^dfa:" - - @staticmethod - def _normalize_key(key): - return re.split(DCMStream.DCM_PREFIX, key)[-1].replace(" ", "_").replace("-", "_") + yield JSONStream("results" + "_".join(self.profile_ids), self.result_generator()) diff --git a/ack/readers/google_dv360/__init__.py b/ack/readers/google_dv360/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/google_dv360/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/google_dv360/cli.py b/ack/readers/google_dv360/cli.py new file mode 100644 index 00000000..5cdbd6cb --- /dev/null +++ b/ack/readers/google_dv360/cli.py @@ -0,0 +1,37 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.google_dv360.config import FILE_TYPES, FILTER_TYPES, REQUEST_TYPES +from ack.readers.google_dv360.reader import GoogleDV360Reader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +@click.command(name="read_dv360") +@click.option("--dv360-access-token", default=None, required=True) +@click.option("--dv360-refresh-token", required=True) +@click.option("--dv360-client-id", required=True) +@click.option("--dv360-client-secret", required=True) +@click.option("--dv360-advertiser-id", required=True) +@click.option("--dv360-request-type", type=click.Choice(REQUEST_TYPES), required=True) +@click.option("--dv360-file-type", type=click.Choice(FILE_TYPES), multiple=True) +@click.option("--dv360-filter-type", type=click.Choice(FILTER_TYPES)) +@processor("dv360_access_token", "dv360_refresh_token", "dv360_client_secret") +def google_dv360(**kwargs): + return GoogleDV360Reader(**extract_args("dv360_", kwargs)) diff --git a/ack/readers/google_dv360/config.py b/ack/readers/google_dv360/config.py new file mode 100644 index 00000000..17df0248 --- /dev/null +++ b/ack/readers/google_dv360/config.py @@ -0,0 +1,54 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from typing import Literal, List + +from pydantic import BaseModel + +FILE_NAMES = { + "FILE_TYPE_INSERTION_ORDER": "InsertionOrders", + "FILE_TYPE_CAMPAIGN": "Campaigns", + "FILE_TYPE_MEDIA_PRODUCT": "MediaProducts", + "FILE_TYPE_LINE_ITEM": "LineItems", + "FILE_TYPE_AD_GROUP": "AdGroups", + "FILE_TYPE_AD": "AdGroupAds", +} + +FILE_TYPES = FILE_NAMES.keys() + +FILTER_TYPES = [ + "FILTER_TYPE_UNSPECIFIED", + "FILTER_TYPE_NONE", + "FILTER_TYPE_ADVERTISER_ID", + "FILTER_TYPE_CAMPAIGN_ID", + "FILTER_TYPE_MEDIA_PRODUCT_ID", + "FILTER_TYPE_INSERTION_ORDER_ID", + "FILTER_TYPE_LINE_ITEM_ID", +] + +REQUEST_TYPES = ["sdf_request", "creative_request"] + + +class GoogleDV360ReaderConfig(BaseModel): + access_token: str = None + refresh_token: str + client_id: str + client_secret: str + advertiser_id: str + request_type: Literal[tuple(REQUEST_TYPES)] + file_type: List[Literal[tuple(FILE_TYPES)]] = [] + filter_type: Literal[tuple(FILTER_TYPES)] = None diff --git a/nck/readers/dv360_reader.py b/ack/readers/google_dv360/reader.py similarity index 51% rename from nck/readers/dv360_reader.py rename to ack/readers/google_dv360/reader.py index 960f01c8..bf0c9060 100644 --- a/nck/readers/dv360_reader.py +++ b/ack/readers/google_dv360/reader.py @@ -15,42 +15,27 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click -import logging -import io -import httplib2 +import io from itertools import chain from typing import List +import httplib2 from googleapiclient import discovery from googleapiclient.http import MediaIoBaseDownload -from oauth2client import client, GOOGLE_REVOKE_URI -from tenacity import retry, wait_exponential, stop_after_delay - -from nck.helpers.dv360_helper import FILE_NAMES, FILE_TYPES, FILTER_TYPES -from nck.utils.exceptions import RetryTimeoutError, SdfOperationError -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.utils.file_reader import sdf_to_njson_generator, unzip -from nck.utils.args import extract_args -from nck.streams.format_date_stream import FormatDateStream - +from ack.config import logger +from ack.readers.google_dv360.config import FILE_NAMES +from ack.readers.reader import Reader +from ack.streams.format_date_stream import FormatDateStream +from ack.streams.json_stream import JSONStream +from ack.utils.exceptions import RetryTimeoutError, SdfOperationError +from ack.utils.file_reader import sdf_to_njson_generator, unzip +from ack.utils.stdout_to_log import http_log, http_log_for_init +from oauth2client import GOOGLE_REVOKE_URI, client +from tenacity import retry, stop_after_delay, wait_exponential -@click.command(name="read_dv360") -@click.option("--dv360-access-token", default=None, required=True) -@click.option("--dv360-refresh-token", required=True) -@click.option("--dv360-client-id", required=True) -@click.option("--dv360-client-secret", required=True) -@click.option("--dv360-advertiser-id", required=True) -@click.option("--dv360-file-type", type=click.Choice(FILE_TYPES), multiple=True, required=True) -@click.option("--dv360-filter-type", type=click.Choice(FILTER_TYPES), required=True) -@processor("dbm_access_token", "dbm_refresh_token", "dbm_client_secret") -def dv360(**kwargs): - return DV360Reader(**extract_args("dv360_", kwargs)) - -class DV360Reader(Reader): +class GoogleDV360Reader(Reader): API_NAME = "displayvideo" API_VERSION = "v1" @@ -63,14 +48,8 @@ class DV360Reader(Reader): # if more than one file type where to be provided. ARCHIVE_NAME = "sdf" - def __init__( - self, - access_token: str, - refresh_token: str, - client_id: str, - client_secret: str, - **kwargs - ): + @http_log_for_init("dv360_reader") + def __init__(self, access_token: str, refresh_token: str, client_id: str, client_secret: str, **kwargs): credentials = client.GoogleCredentials( access_token, @@ -80,19 +59,17 @@ def __init__( token_expiry=None, token_uri="https://www.googleapis.com/oauth2/v4/token", user_agent=None, - revoke_uri=GOOGLE_REVOKE_URI + revoke_uri=GOOGLE_REVOKE_URI, ) http = credentials.authorize(httplib2.Http()) credentials.refresh(http) - self._client = discovery.build( - self.API_NAME , self.API_VERSION, http=http, cache_discovery=False - ) + self._client = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) self.kwargs = kwargs - self.file_names = self.get_file_names() + self.file_names = self.__get_file_names() - def get_file_names(self) -> List[str]: + def __get_file_names(self) -> List[str]: """ DV360 api creates one file per file_type. map file_type with the name of the generated file. @@ -100,10 +77,9 @@ def get_file_names(self) -> List[str]: return [f"SDF-{FILE_NAMES[file_type]}" for file_type in self.kwargs.get("file_type")] @retry( - wait=wait_exponential(multiplier=1, min=60, max=3600), - stop=stop_after_delay(36000), + wait=wait_exponential(multiplier=1, min=60, max=3600), stop=stop_after_delay(36000), ) - def _wait_sdf_download_request(self, operation): + def __wait_sdf_download_request(self, operation): """ Wait for a sdf task to be completed. ie. (file ready for download) Args: @@ -111,16 +87,14 @@ def _wait_sdf_download_request(self, operation): Returns: operation (dict): task metadata updated with resource location. """ - logging.info( - f"waiting for SDF operation: {operation['name']} to complete running." - ) + logger.info(f"waiting for SDF operation: {operation['name']} to complete running.") get_request = self._client.sdfdownloadtasks().operations().get(name=operation["name"]) operation = get_request.execute() if "done" not in operation: raise RetryTimeoutError("The operation has taken more than 10 hours to complete.\n") return operation - def create_sdf_task(self, body): + def __create_sdf_task(self, body): """ Create a sdf asynchronous task of type googleapiclient.discovery.Resource Args: @@ -130,10 +104,10 @@ def create_sdf_task(self, body): """ operation = self._client.sdfdownloadtasks().create(body=body).execute() - logging.info("Operation %s was created." % operation["name"]) + logger.info(f"Operation {operation['name']} was created.") return operation - def download_sdf(self, operation): + def __download_sdf(self, operation): request = self._client.media().download(resourceName=operation["response"]["resourceName"]) request.uri = request.uri.replace("?alt=json", "?alt=media") sdf = io.FileIO(f"{self.BASE}/{self.ARCHIVE_NAME}.zip", mode="wb") @@ -141,41 +115,54 @@ def download_sdf(self, operation): done = False while done is False: status, done = downloader.next_chunk() - logging.info(f"Download {int(status.progress() * 100)}%.") + logger.info(f"Download {int(status.progress() * 100)}%.") - def get_sdf_body(self): + def __get_sdf_body(self): return { - "parentEntityFilter": { - "fileType": self.kwargs.get("file_type"), - "filterType": self.kwargs.get("filter_type") - }, + "parentEntityFilter": {"fileType": self.kwargs.get("file_type"), "filterType": self.kwargs.get("filter_type")}, "version": self.SDF_VERSION, - "advertiserId": self.kwargs.get("advertiser_id") + "advertiserId": self.kwargs.get("advertiser_id"), } - def get_sdf_objects(self): - body = self.get_sdf_body() - init_operation = self.create_sdf_task(body=body) - created_operation = self._wait_sdf_download_request(init_operation) + def __get_sdf_objects(self): + body = self.__get_sdf_body() + init_operation = self.__create_sdf_task(body=body) + created_operation = self.__wait_sdf_download_request(init_operation) if "error" in created_operation: - raise SdfOperationError("The operation finished in error with code %s: %s" % ( - created_operation["error"]["code"], - created_operation["error"]["message"])) - self.download_sdf(created_operation) + raise SdfOperationError( + "The operation finished in error with code " + f"{created_operation['error']['code']}: {created_operation['error']['message']}" + ) + self.__download_sdf(created_operation) unzip(f"{self.BASE}/{self.ARCHIVE_NAME}.zip", output_path=self.BASE) # We chain operation if many file_types were to be provided. - return chain( - *[ - sdf_to_njson_generator(f"{self.BASE}/{file_name}.csv") - for file_name in self.file_names - ] - ) - + return chain(*[sdf_to_njson_generator(f"{self.BASE}/{file_name}.csv") for file_name in self.file_names]) + + def __get_creatives(self): + response = self._client.advertisers().creatives().list(advertiserId=self.kwargs.get("advertiser_id")).execute() + if len(response.keys()) == 0: # no data returned + return {} + else: + all_creatives = response["creatives"] + while "nextPageToken" in response: + token = response["nextPageToken"] + logger.info(f"Query a new page of creatives. Page id: {token}") + response = ( + self._client.advertisers() + .creatives() + .list(advertiserId=self.kwargs.get("advertiser_id"), pageToken=token) + .execute() + ) + all_creatives.extend(response["creatives"]) + yield from all_creatives + + @http_log("dv360_reader") def read(self): - yield FormatDateStream( - "sdf", - self.get_sdf_objects(), - keys=["Date"], - date_format=self.kwargs.get("date_format"), - ) + request_type = self.kwargs.get("request_type") + if request_type == "sdf_request": + yield FormatDateStream( + "sdf", self.__get_sdf_objects(), keys=["Date"], date_format=self.kwargs.get("date_format"), + ) + elif request_type == "creative_request": + yield JSONStream("advertiser_creatives", self.__get_creatives()) diff --git a/ack/readers/google_sa360/__init__.py b/ack/readers/google_sa360/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/google_sa360/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/google_sa360/cli.py b/ack/readers/google_sa360/cli.py new file mode 100644 index 00000000..b41d76d4 --- /dev/null +++ b/ack/readers/google_sa360/cli.py @@ -0,0 +1,59 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.google_sa360.config import REPORT_TYPES +from ack.readers.google_sa360.reader import GoogleSA360Reader +from ack.utils.args import extract_args +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS +from ack.utils.processor import processor + + +@click.command(name="read_sa360") +@click.option("--sa360-access-token", default=None) +@click.option("--sa360-client-id", required=True) +@click.option("--sa360-client-secret", required=True) +@click.option("--sa360-refresh-token", required=True) +@click.option("--sa360-agency-id", required=True) +@click.option( + "--sa360-advertiser-id", + "sa360_advertiser_ids", + multiple=True, + help="If empty, all advertisers from agency will be requested", +) +@click.option("--sa360-report-name", default="SA360 Report") +@click.option("--sa360-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) +@click.option( + "--sa360-column", "sa360_columns", multiple=True, help="https://developers.google.com/search-ads/v2/report-types", +) +@click.option( + "--sa360-saved-column", + "sa360_saved_columns", + multiple=True, + help="https://developers.google.com/search-ads/v2/how-tos/reporting/saved-columns", +) +@click.option("--sa360-start-date", type=click.DateTime(), help="Start date of the report") +@click.option("--sa360-end-date", type=click.DateTime(), help="End date of the report") +@click.option( + "--sa360-date-range", + type=click.Choice(DEFAULT_DATE_RANGE_FUNCTIONS.keys()), + help=f"One of the available ACK default date ranges: {DEFAULT_DATE_RANGE_FUNCTIONS.keys()}", +) +@processor("sa360_access_token", "sa360_refresh_token", "sa360_client_secret") +def google_sa360(**kwargs): + return GoogleSA360Reader(**extract_args("sa360_", kwargs)) diff --git a/nck/helpers/sa360_helper.py b/ack/readers/google_sa360/config.py similarity index 57% rename from nck/helpers/sa360_helper.py rename to ack/readers/google_sa360/config.py index 7375996c..f2b56718 100644 --- a/nck/helpers/sa360_helper.py +++ b/ack/readers/google_sa360/config.py @@ -15,6 +15,13 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime +from typing import List, Literal + +from pydantic import BaseModel, validator + +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS + REPORT_TYPES = [ "advertiser", "account", @@ -39,3 +46,28 @@ "productTarget", "visit", ] + + +class GoogleSA360ReaderConfig(BaseModel): + access_token: str = None + refresh_token: str + client_id: str + client_secret: str + agency_id: str + advertiser_ids: List[str] = [] + report_name: str = "SA360 Report" + report_type: Literal[tuple(REPORT_TYPES)] = REPORT_TYPES[0] + columns: List[str] = [] + saved_columns: List[str] = [] + start_date: datetime = None + end_date: datetime = None + date_range: Literal[tuple(DEFAULT_DATE_RANGE_FUNCTIONS.keys())] = None + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v diff --git a/nck/readers/sa360_reader.py b/ack/readers/google_sa360/reader.py similarity index 51% rename from nck/readers/sa360_reader.py rename to ack/readers/google_sa360/reader.py index 3ff2d16f..876956ff 100644 --- a/nck/readers/sa360_reader.py +++ b/ack/readers/google_sa360/reader.py @@ -15,56 +15,15 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.clients.sa360_client import SA360Client -from nck.helpers.sa360_helper import REPORT_TYPES -from nck.utils.args import extract_args -from nck.utils.text import get_report_generator_from_flat_file +from ack.clients.google_sa360.client import GoogleSA360Client +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.date_handler import build_date_range +from ack.utils.text import get_report_generator_from_flat_file -DATEFORMAT = "%Y-%m-%d" -ENCODING = "utf-8" - -@click.command(name="read_sa360") -@click.option("--sa360-access-token", default=None) -@click.option("--sa360-client-id", required=True) -@click.option("--sa360-client-secret", required=True) -@click.option("--sa360-refresh-token", required=True) -@click.option("--sa360-agency-id", required=True) -@click.option( - "--sa360-advertiser-id", - "sa360_advertiser_ids", - multiple=True, - help="If empty, all advertisers from agency will be requested", -) -@click.option("--sa360-report-name", default="SA360 Report") -@click.option( - "--sa360-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0] -) -@click.option( - "--sa360-column", - "sa360_columns", - multiple=True, - help="https://developers.google.com/search-ads/v2/report-types", -) -@click.option( - "--sa360-saved-column", - "sa360_saved_columns", - multiple=True, - help="https://developers.google.com/search-ads/v2/how-tos/reporting/saved-columns", -) -@click.option("--sa360-start-date", type=click.DateTime(), required=True) -@click.option("--sa360-end-date", type=click.DateTime(), required=True) -@processor("sa360_access_token", "sa360_refresh_token", "sa360_client_secret") -def sa360_reader(**kwargs): - return SA360Reader(**extract_args("sa360_", kwargs)) - - -class SA360Reader(Reader): +class GoogleSA360Reader(Reader): def __init__( self, access_token, @@ -79,10 +38,9 @@ def __init__( saved_columns, start_date, end_date, + date_range, ): - self.sa360_client = SA360Client( - access_token, client_id, client_secret, refresh_token - ) + self.sa360_client = GoogleSA360Client(access_token, client_id, client_secret, refresh_token) self.agency_id = agency_id self.advertiser_ids = list(advertiser_ids) self.report_name = report_name @@ -90,8 +48,7 @@ def __init__( self.columns = list(columns) self.saved_columns = list(saved_columns) self.all_columns = self.columns + self.saved_columns - self.start_date = start_date - self.end_date = end_date + self.start_date, self.end_date = build_date_range(start_date, end_date, date_range) def result_generator(self): for advertiser_id in self.advertiser_ids: @@ -109,17 +66,11 @@ def result_generator(self): report_data = self.sa360_client.assert_report_file_ready(report_id) - for line_iterator in self.sa360_client.download_report_files( - report_data, report_id - ): + for line_iterator in self.sa360_client.download_report_files(report_data, report_id): yield from get_report_generator_from_flat_file(line_iterator) def read(self): if not self.advertiser_ids: - self.advertiser_ids = self.sa360_client.get_all_advertisers_of_agency( - self.agency_id - ) + self.advertiser_ids = self.sa360_client.get_all_advertisers_of_agency(self.agency_id) - yield NormalizedJSONStream( - "results" + "_".join(self.advertiser_ids), self.result_generator() - ) + yield JSONStream("results" + "_".join(self.advertiser_ids), self.result_generator()) diff --git a/ack/readers/google_search_console/__init__.py b/ack/readers/google_search_console/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/google_search_console/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/google_search_console/cli.py b/ack/readers/google_search_console/cli.py new file mode 100644 index 00000000..7a3bef32 --- /dev/null +++ b/ack/readers/google_search_console/cli.py @@ -0,0 +1,44 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.google_search_console.reader import GoogleSearchConsoleReader +from ack.utils.args import extract_args +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS +from ack.utils.processor import processor + + +@click.command(name="read_search_console") +@click.option("--search-console-client-id", required=True) +@click.option("--search-console-client-secret", required=True) +@click.option("--search-console-access-token", default="") +@click.option("--search-console-refresh-token", required=True) +@click.option("--search-console-dimensions", required=True, multiple=True) +@click.option("--search-console-site-url", required=True) +@click.option("--search-console-start-date", type=click.DateTime(), default=None) +@click.option("--search-console-end-date", type=click.DateTime(), default=None) +@click.option("--search-console-date-column", type=click.BOOL, default=False) +@click.option("--search-console-row-limit", type=click.INT, default=25000) +@click.option( + "--search-console-date-range", + type=click.Choice(DEFAULT_DATE_RANGE_FUNCTIONS.keys()), + help=f"One of the available ACK default date ranges: {DEFAULT_DATE_RANGE_FUNCTIONS.keys()}", +) +@processor() +def google_search_console(**params): + return GoogleSearchConsoleReader(**extract_args("search_console_", params)) diff --git a/ack/readers/google_search_console/config.py b/ack/readers/google_search_console/config.py new file mode 100644 index 00000000..bfb2cdf8 --- /dev/null +++ b/ack/readers/google_search_console/config.py @@ -0,0 +1,49 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime +from typing import List, Literal + +from pydantic import BaseModel, validator + +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS + +DATEFORMAT = "%Y-%m-%d" +GOOGLE_TOKEN_URI = "https://accounts.google.com/o/oauth2/token" + + +class GoogleSearchConsoleReaderConfig(BaseModel): + client_id: str + client_secret: str + access_token: str = "" + refresh_token: str + dimensions: List[str] + site_url: str + start_date: datetime = None + end_date: datetime = None + date_column: bool = False + row_limit: int = 25000 + date_range: Literal[tuple(DEFAULT_DATE_RANGE_FUNCTIONS.keys())] = None + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v diff --git a/nck/readers/search_console_reader.py b/ack/readers/google_search_console/reader.py similarity index 65% rename from nck/readers/search_console_reader.py rename to ack/readers/google_search_console/reader.py index b346cae9..89c253a2 100644 --- a/nck/readers/search_console_reader.py +++ b/ack/readers/google_search_console/reader.py @@ -15,44 +15,25 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from oauth2client.client import GoogleCredentials -from oauth2client import GOOGLE_REVOKE_URI -from googleapiclient.discovery import build -import httplib2 + from datetime import datetime, timedelta -import click -import logging - -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.utils.args import extract_args -from nck.utils.retry import retry - - -@click.command(name="read_search_console") -@click.option("--search-console-client-id", required=True) -@click.option("--search-console-client-secret", required=True) -@click.option("--search-console-access-token", default="") -@click.option("--search-console-refresh-token", required=True) -@click.option("--search-console-dimensions", required=True, multiple=True) -@click.option("--search-console-site-url", required=True) -@click.option("--search-console-start-date", type=click.DateTime(), default=None) -@click.option("--search-console-end-date", type=click.DateTime(), default=None) -@click.option("--search-console-date-column", type=click.BOOL, default=False) -@click.option("--search-console-row-limit", type=click.INT, default=25000) -@processor() -def search_console(**params): - return SearchConsoleReader(**extract_args("search_console_", params)) - - -DATEFORMAT = "%Y-%m-%d" -# most recent data available is often 2 days ago. +import httplib2 +from googleapiclient.discovery import build +from ack.config import logger +from ack.readers.google_search_console.config import DATEFORMAT, GOOGLE_TOKEN_URI +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.date_handler import build_date_range +from ack.utils.retry import retry +from oauth2client import GOOGLE_REVOKE_URI +from oauth2client.client import GoogleCredentials + +# Most recent data available is often 2 days ago MAX_END_DATE = datetime.today() - timedelta(days=2) -class SearchConsoleReader(Reader): +class GoogleSearchConsoleReader(Reader): def __init__( self, client_id, @@ -65,6 +46,7 @@ def __init__( end_date, date_column, row_limit, + date_range, ): self.client_id = client_id self.client_secret = client_secret @@ -72,8 +54,7 @@ def __init__( self.refresh_token = refresh_token self.dimensions = list(dimensions) self.site_url = site_url - self.start_date = datetime.strftime(start_date, DATEFORMAT) - self.end_date = datetime.strftime(self.check_end_date(end_date), DATEFORMAT) + self.start_date, self.end_date = build_date_range(start_date, end_date, date_range) self.with_date_column = date_column self.row_limit = row_limit @@ -89,7 +70,7 @@ def initialize_analyticsreporting(self): client_secret=self.client_secret, refresh_token=self.refresh_token, token_expiry=None, - token_uri="https://accounts.google.com/o/oauth2/token", + token_uri=GOOGLE_TOKEN_URI, user_agent=None, revoke_uri=GOOGLE_REVOKE_URI, ) @@ -97,21 +78,19 @@ def initialize_analyticsreporting(self): http = credentials.authorize(httplib2.Http()) credentials.refresh(http) - self._service = build( - serviceName="webmasters", version="v3", credentials=credentials, cache_discovery=False - ) + self._service = build(serviceName="webmasters", version="v3", credentials=credentials, cache_discovery=False) @staticmethod def check_end_date(end_date): if end_date > MAX_END_DATE: - logging.warning(f"The most recent date you can request is {datetime.strftime(MAX_END_DATE, DATEFORMAT)}") + logger.warning(f"The most recent date you can request is {datetime.strftime(MAX_END_DATE, DATEFORMAT)}") return end_date def build_query(self): query = { - "startDate": self.start_date, - "endDate": self.end_date, + "startDate": datetime.strftime(self.start_date, DATEFORMAT), + "endDate": datetime.strftime(self.check_end_date(self.end_date), DATEFORMAT), "dimensions": self.dimensions, "startRow": self.start_row, "rowLimit": self.row_limit, @@ -130,7 +109,7 @@ def _run_query(self): # Pagination while len(response.get("rows", [])) != 0: - logging.info("{} lines successfully processed...".format(len(response.get("rows")) + self.start_row)) + logger.info(f"{len(response.get('rows')) + self.start_row} lines successfully processed...") self.start_row += self.row_limit response = self._service.searchanalytics().query(siteUrl=self.site_url, body=self.build_query()).execute() yield response @@ -144,7 +123,7 @@ def format_and_yield(self, data): for dimension, key in zip(self.dimensions, keys): if self.with_date_column: - record["date"] = self.start_date + record["date"] = datetime.strftime(self.start_date, DATEFORMAT) record[dimension] = key for metric in metric_names: @@ -160,4 +139,4 @@ def result_generator(self): return None def read(self): - yield NormalizedJSONStream("search_console_results", self.result_generator()) + yield JSONStream("search_console_results", self.result_generator()) diff --git a/ack/readers/google_sheets/__init__.py b/ack/readers/google_sheets/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/google_sheets/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/google_sheets/cli.py b/ack/readers/google_sheets/cli.py new file mode 100644 index 00000000..5acc1ae1 --- /dev/null +++ b/ack/readers/google_sheets/cli.py @@ -0,0 +1,73 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.google_sheets.reader import GoogleSheetsReader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +@click.command(name="read_gs") +@click.option( + "--gs-project-id", + required=True, + help="Project ID that is given by Google services once you have \ + created your project in the google cloud console. You can retrieve it in the JSON credential file", +) +@click.option( + "--gs-private-key-id", + required=True, + help="Private key ID given by Google services once you have added credentials \ + to the project. You can retrieve it in the JSON credential file", +) +@click.option( + "--gs-private-key", + required=True, + help="The private key given by Google services once you have added credentials \ + to the project. \ + You can retrieve it first in the JSON credential file", +) +@click.option( + "--gs-client-email", + required=True, + help="Client e-mail given by Google services once you have added credentials \ + to the project. You can retrieve it in the JSON credential file", +) +@click.option( + "--gs-client-id", + required=True, + help="Client ID given by Google services once you have added credentials \ + to the project. You can retrieve it in the JSON credential file", +) +@click.option( + "--gs-client-cert", + required=True, + help="Client certificate given by Google services once you have added credentials \ + to the project. You can retrieve it in the JSON credential file", +) +@click.option("--gs-sheet-key", required=True, help="Google spreadsheet key that is availbale in the url") +@click.option( + "--gs-page-number", + default=0, + type=click.INT, + help="The page number you want to access.\ + The number pages starts at 0", +) +@processor("gs_private_key_id", "gs_private_key", "gs_client_id", "gs_client_cert") +def google_sheets(**kwargs): + return GoogleSheetsReader(**extract_args("gs_", kwargs)) diff --git a/ack/readers/google_sheets/config.py b/ack/readers/google_sheets/config.py new file mode 100644 index 00000000..15ea5707 --- /dev/null +++ b/ack/readers/google_sheets/config.py @@ -0,0 +1,12 @@ +from pydantic import BaseModel + + +class GoogleSheetsReaderConfig(BaseModel): + project_id: str + private_key_id: str + private_key: str + client_email: str + client_id: str + client_cert: str + sheet_key: str + page_number: int = 0 diff --git a/nck/readers/gs_reader.py b/ack/readers/google_sheets/reader.py similarity index 55% rename from nck/readers/gs_reader.py rename to ack/readers/google_sheets/reader.py index 9b553814..6713526a 100644 --- a/nck/readers/gs_reader.py +++ b/ack/readers/google_sheets/reader.py @@ -15,69 +15,15 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click + import gspread from google.auth.transport.requests import AuthorizedSession from google.oauth2 import service_account - -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.utils.args import extract_args -from nck.streams.json_stream import JSONStream - - -@click.command(name="read_gs") -@click.option( - "--gs-project-id", - required=True, - help="Project ID that is given by Google services once you have \ - created your project in the google cloud console. You can retrieve it in the JSON credential file", -) -@click.option( - "--gs-private-key-id", - required=True, - help="Private key ID given by Google services once you have added credentials \ - to the project. You can retrieve it in the JSON credential file", -) -@click.option( - "--gs-private-key", - required=True, - help="The private key given by Google services once you have added credentials \ - to the project. \ - You can retrieve it first in the JSON credential file", -) -@click.option( - "--gs-client-email", - required=True, - help="Client e-mail given by Google services once you have added credentials \ - to the project. You can retrieve it in the JSON credential file", -) -@click.option( - "--gs-client-id", - required=True, - help="Client ID given by Google services once you have added credentials \ - to the project. You can retrieve it in the JSON credential file", -) -@click.option( - "--gs-client-cert", - required=True, - help="Client certificate given by Google services once you have added credentials \ - to the project. You can retrieve it in the JSON credential file", -) -@click.option("--gs-sheet-key", required=True, help="Google spreadsheet key that is availbale in the url") -@click.option( - "--gs-page-number", - default=0, - type=click.INT, - help="The page number you want to access.\ - The number pages starts at 0", -) -@processor("gs_private_key_id", "gs_private_key", "gs_client_id", "gs_client_cert") -def google_sheets(**kwargs): - return GSheetsReader(**extract_args("gs_", kwargs)) +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream -class GSheetsReader(Reader): +class GoogleSheetsReader(Reader): _scopes = [ "https://www.googleapis.com/auth/spreadsheets.readonly", "https://www.googleapis.com/auth/spreadsheets", @@ -98,9 +44,7 @@ def __init__( ): self._sheet_key = sheet_key self._page_number = page_number - credentials = self.__init_credentials( - project_id, private_key_id, private_key, client_email, client_id, client_cert - ) + credentials = self.__init_credentials(project_id, private_key_id, private_key, client_email, client_id, client_cert) scoped_credentials = credentials.with_scopes(self._scopes) self._gc = gspread.Client(auth=scoped_credentials) self._gc.session = AuthorizedSession(scoped_credentials) diff --git a/ack/readers/google_sheets_old/__init__.py b/ack/readers/google_sheets_old/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/google_sheets_old/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/google_sheets_old/cli.py b/ack/readers/google_sheets_old/cli.py new file mode 100644 index 00000000..3c7da103 --- /dev/null +++ b/ack/readers/google_sheets_old/cli.py @@ -0,0 +1,30 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.google_sheets_old.reader import GoogleSheetsReaderOld +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +@click.command(name="read_gsheets") +@click.option("--gsheets-url", required=True) +@click.option("--gsheets-worksheet-name", required=True, multiple=True) +@processor() +def google_sheets_old(**kwargs): + return GoogleSheetsReaderOld(**extract_args("gsheets_", kwargs)) diff --git a/ack/readers/google_sheets_old/config.py b/ack/readers/google_sheets_old/config.py new file mode 100644 index 00000000..bb80b589 --- /dev/null +++ b/ack/readers/google_sheets_old/config.py @@ -0,0 +1,8 @@ +from typing import List + +from pydantic import BaseModel + + +class GoogleSheetsReaderOldConfig(BaseModel): + url: str + worksheet_name: List[str] diff --git a/nck/readers/gsheets_reader.py b/ack/readers/google_sheets_old/reader.py similarity index 74% rename from nck/readers/gsheets_reader.py rename to ack/readers/google_sheets_old/reader.py index f03b6bae..12ebe3bd 100644 --- a/nck/readers/gsheets_reader.py +++ b/ack/readers/google_sheets_old/reader.py @@ -15,25 +15,14 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click + import gspread +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream from oauth2client.client import GoogleCredentials -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.utils.args import extract_args -from nck.streams.normalized_json_stream import NormalizedJSONStream - - -@click.command(name="read_gsheets") -@click.option("--gsheets-url", required=True) -@click.option("--gsheets-worksheet-name", required=True, multiple=True) -@processor() -def gsheets(**kwargs): - return GSheetsReader(**extract_args("gsheets_", kwargs)) - -class GSheetsReader(Reader): +class GoogleSheetsReaderOld(Reader): _scopes = [ "https://spreadsheets.google.com/feeds", @@ -59,4 +48,4 @@ def result_generator(): for record in worksheet.get_all_records(): yield record - yield NormalizedJSONStream(worksheet.title, result_generator()) + yield JSONStream(worksheet.title, result_generator()) diff --git a/ack/readers/mysql/__init__.py b/ack/readers/mysql/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/mysql/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/mysql/cli.py b/ack/readers/mysql/cli.py new file mode 100644 index 00000000..196b2422 --- /dev/null +++ b/ack/readers/mysql/cli.py @@ -0,0 +1,72 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.mysql.reader import MySQLReader +from ack.utils.args import extract_args, has_arg, hasnt_arg +from ack.utils.processor import processor + + +@click.command(name="read_mysql") +@click.option("--mysql-user", required=True) +@click.option("--mysql-password", required=True) +@click.option("--mysql-host", required=True) +@click.option("--mysql-port", default=3306) +@click.option("--mysql-database", required=True) +@click.option("--mysql-watermark-column") +@click.option("--mysql-watermark-init") +@click.option("--mysql-query") +@click.option("--mysql-query-name") +@click.option("--mysql-table") +@click.option("--mysql-redis-state-service-name") +@click.option("--mysql-redis-state-service-host") +@click.option("--mysql-redis-state-service-port", default=6379) +@processor("mysql_password") +def mysql(**kwargs): + query_key = "mysql_query" + query_name_key = "mysql_query_name" + table_key = "mysql_table" + watermark_column_key = "mysql_watermark_column" + watermark_init_key = "mysql_watermark_init" + redis_state_service_keys = [ + "mysql_redis_state_service_name", + "mysql_redis_state_service_host", + "mysql_redis_state_service_port", + ] + + if hasnt_arg(query_key, kwargs) and hasnt_arg(table_key, kwargs): + raise click.BadParameter("Must specify either a table or a query for MySQL reader") + + if has_arg(query_key, kwargs) and has_arg(table_key, kwargs): + raise click.BadParameter("Cannot specify both a query and a table") + + if has_arg(query_key, kwargs) and hasnt_arg(query_name_key, kwargs): + raise click.BadParameter("Must specify a query name when running a MySQL query") + + redis_state_service_enabled = all([has_arg(key, kwargs) for key in redis_state_service_keys]) + + if has_arg(watermark_column_key, kwargs) and not redis_state_service_enabled: + raise click.BadParameter("You must configure state management to use MySQL watermarks") + + if hasnt_arg(watermark_column_key, kwargs) and redis_state_service_enabled: + raise click.BadParameter("You must specify a MySQL watermark when using state management") + + if hasnt_arg(watermark_init_key, kwargs) and redis_state_service_enabled: + raise click.BadParameter("You must specify an initial MySQL watermark value when using state management") + + return MySQLReader(**extract_args("mysql_", kwargs)) diff --git a/ack/readers/mysql/config.py b/ack/readers/mysql/config.py new file mode 100644 index 00000000..238bdc79 --- /dev/null +++ b/ack/readers/mysql/config.py @@ -0,0 +1,17 @@ +from pydantic import BaseModel + + +class MySQLReaderConfig(BaseModel): + user: str + password: str + host: str + port: int = 3306 + database: str + watermark_column: str + watermark_init: str + query: str + query_name: str + table: str + redis_state_service_name: str + redis_state_service_host: str + redis_state_service_port: int = 6379 diff --git a/nck/utils/sql.py b/ack/readers/mysql/helper.py similarity index 74% rename from nck/utils/sql.py rename to ack/readers/mysql/helper.py index 4730a48b..388a3b33 100644 --- a/nck/utils/sql.py +++ b/ack/readers/mysql/helper.py @@ -15,52 +15,49 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + import sqlalchemy _engine_meta = {} -def get_meta(engine, schema): - global _engine_meta +def build_table_query(engine, schema, table, watermark_column, watermark_value): + if watermark_column and watermark_value: + return _build_table_query_with_watermark(engine, schema, table, watermark_column, watermark_value) + else: + return _build_table_query_without_watermark(engine, schema, table) - if engine not in _engine_meta: - _engine_meta[engine] = sqlalchemy.MetaData(engine, schema=schema) - return _engine_meta[engine] +def build_custom_query(engine, query, watermark_column, watermark_value): + statement = sqlalchemy.text(query) + if watermark_column: + params = {watermark_column: watermark_value} + statement = statement.bindparams(**params) -def get_table(engine, schema, table): - meta = get_meta(engine, schema) - table = sqlalchemy.Table(table, meta, autoload=True, autoload_with=engine) + return statement - return table +def _build_table_query_without_watermark(engine, schema, table): + return _get_table(engine, schema, table).select() -def build_table_query(engine, schema, table, watermark_column, watermark_value): - if watermark_column and watermark_value: - return build_table_query_with_watermark( - engine, schema, table, watermark_column, watermark_value - ) - else: - return build_table_query_without_watermark(engine, schema, table) +def _build_table_query_with_watermark(engine, schema, table, watermark_column, watermark_value): + t = _get_table(engine, schema, table) + return t.select().where(t.columns[watermark_column] > watermark_value) -def build_table_query_without_watermark(engine, schema, table): - return get_table(engine, schema, table).select() +def _get_table(engine, schema, table): + meta = _get_meta(engine, schema) + table = sqlalchemy.Table(table, meta, autoload=True, autoload_with=engine) -def build_table_query_with_watermark( - engine, schema, table, watermark_column, watermark_value -): - t = get_table(engine, schema, table) - return t.select().where(t.columns[watermark_column] > watermark_value) + return table -def build_custom_query(engine, query, watermark_column, watermark_value): - statement = sqlalchemy.text(query) +def _get_meta(engine, schema): + global _engine_meta - if watermark_column: - params = {watermark_column: watermark_value} - statement = statement.bindparams(**params) + if engine not in _engine_meta: + _engine_meta[engine] = sqlalchemy.MetaData(engine, schema=schema) - return statement + return _engine_meta[engine] diff --git a/ack/readers/mysql/reader.py b/ack/readers/mysql/reader.py new file mode 100644 index 00000000..d45a6a72 --- /dev/null +++ b/ack/readers/mysql/reader.py @@ -0,0 +1,109 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import sqlalchemy +from ack.config import logger +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.redis import RedisStateService +from ack.utils.retry import retry +from ack.readers.mysql.helper import build_custom_query, build_table_query + + +class MySQLReader(Reader): + def __init__( + self, + user, + password, + host, + port, + database, + watermark_column, + watermark_init, + query, + query_name, + table, + schema, + redis_state_service_name, + redis_state_service_host, + redis_state_service_port, + ): + + self._engine = self._create_engine(host, port, user, password, database) + self._name = table if table else query_name + self._schema = schema + self._watermark_column = watermark_column + self._redis_state_service = RedisStateService( + redis_state_service_name, redis_state_service_host, redis_state_service_port + ) + + if watermark_column: + self._watermark_value = self._redis_state_service.get(self._name) or watermark_init + + if table: + self._query = build_table_query(self._engine, schema, table, watermark_column, self._watermark_value) + else: + self._query = build_custom_query(self._engine, schema, query, watermark_column, self._watermark_value) + + @classmethod + def _create_engine(cls, host, port, user, password, database): + logger.info(f"Connecting to MySQL Database {database} on {host}:{port}") + + url = sqlalchemy.engine.url.URL( + **{ + "drivername": "mysql+pymysql", + "username": user, + "password": password, + "database": database, + "port": port, + "host": host, + } + ) + + return sqlalchemy.create_engine(url) + + def read(self): + try: + yield self._run_query() + finally: + self.close() + + @retry + def _run_query(self): + logger.info(f"Running MySQL query {self._query}") + + rows = self._engine.execute(self._query) + + logger.info(f"MySQL result set contains {rows.rowcount} rows") + + def result_generator(): + row = rows.fetchone() + while row: + yield dict(row.items()) + + if self._watermark_column: + self._redis_state_service.set(self._name, row[self._watermark_column]) + + row = rows.fetchone() + rows.close() + + return JSONStream(self._name, result_generator()) + + def close(self): + logger.info("Closing MySQL connection") + self._engine.dispose() diff --git a/ack/readers/mytarget/__init__.py b/ack/readers/mytarget/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/mytarget/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/mytarget/cli.py b/ack/readers/mytarget/cli.py new file mode 100644 index 00000000..738144f4 --- /dev/null +++ b/ack/readers/mytarget/cli.py @@ -0,0 +1,41 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.mytarget.config import REQUEST_TYPES +from ack.readers.mytarget.reader import MyTargetReader +from ack.utils.args import extract_args +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS +from ack.utils.processor import processor + + +@click.command(name="read_mytarget") +@click.option("--mytarget-client-id", required=True) +@click.option("--mytarget-client-secret", required=True) +@click.option("--mytarget-refresh-token", required=True) +@click.option("--mytarget-request-type", type=click.Choice(REQUEST_TYPES), required=True) +@click.option( + "--mytarget-date-range", + type=click.Choice(DEFAULT_DATE_RANGE_FUNCTIONS.keys()), + help=f"One of the available ACK default date ranges: {DEFAULT_DATE_RANGE_FUNCTIONS.keys()}", +) +@click.option("--mytarget-start-date", type=click.DateTime()) +@click.option("--mytarget-end-date", type=click.DateTime()) +@processor("mytarget-client-id", "mytarget-client-secret") +def mytarget(**kwargs): + return MyTargetReader(**extract_args("mytarget_", kwargs)) diff --git a/ack/readers/mytarget/config.py b/ack/readers/mytarget/config.py new file mode 100644 index 00000000..ffd4143a --- /dev/null +++ b/ack/readers/mytarget/config.py @@ -0,0 +1,97 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime +from typing import Literal + +from pydantic import BaseModel, validator + +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS + +LIMIT_REQUEST_MYTARGET = 20 + +REQUEST_TYPES = ["performance", "budget"] + +REQUEST_CONFIG = { + "refresh_agency_token": { + "url": "https://target.my.com/api/v2/oauth2/token.json", + "headers_type": "content_type", + "offset": False, + "_campaign_id": False, + "dates_required": False, + "ids": False, + }, + "get_campaign_ids_names": { + "url": "https://target.my.com/api/v2/campaigns.json?fields=id,name", + "headers_type": "authorization", + "offset": True, + "_campaign_id": False, + "dates_required": False, + "ids": False, + }, + "get_banner_ids_names": { + "url": "https://target.my.com/api/v2/banners.json?fields=id,name,campaign_id", + "headers_type": "authorization", + "offset": True, + "_campaign_id": False, + "dates_required": False, + "ids": False, + }, + "get_banner_stats": { + "url": "https://target.my.com/api/v2/statistics/banners/day.json", + "headers_type": "authorization", + "offset": False, + "_campaign_id": False, + "dates_required": True, + "ids": False, + }, + "get_campaign_budgets": { + "url": "https://target.my.com/api/v2/campaigns.json?fields=id,name,budget_limit,budget_limit_day", + "headers_type": "authorization", + "offset": True, + "_campaign_id": False, + "dates_required": False, + "ids": False, + }, + "get_campaign_dates": { + "url": "https://target.my.com/api/v2/campaigns.json?fields=id,name,date_start,date_end,status", + "headers_type": "authorization", + "offset": True, + "_campaign_id": False, + "dates_required": False, + "ids": False, + }, +} + + +class MyTargetReaderConfig(BaseModel): + client_id: str + client_secret: str + refresh_token: str + request_type: Literal[tuple(REQUEST_TYPES)] + date_range: Literal[tuple(DEFAULT_DATE_RANGE_FUNCTIONS.keys())] = None + start_date: datetime = None + end_date: datetime = None + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v diff --git a/ack/readers/mytarget/reader.py b/ack/readers/mytarget/reader.py new file mode 100644 index 00000000..9a8cba80 --- /dev/null +++ b/ack/readers/mytarget/reader.py @@ -0,0 +1,266 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import itertools +from datetime import datetime +from typing import Any, Dict, List, Tuple + +import requests +from ack.readers.mytarget.config import LIMIT_REQUEST_MYTARGET, REQUEST_CONFIG +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.exceptions import MissingItemsInResponse +from tenacity import retry, stop_after_delay, wait_exponential +from ack.utils.date_handler import build_date_range + + +class MyTargetReader(Reader): + def __init__(self, client_id, client_secret, refresh_token, request_type, date_range, start_date, end_date, **kwargs): + self.client_id = client_id + self.client_secret = client_secret + self.agency_client_token = {"refresh_token": refresh_token} + self.request_type = request_type + self.start_date, self.end_date = build_date_range(start_date, end_date, date_range) + self.date_format = kwargs.get("date_format") + self.date_are_valid = self.__check_date_input_validity() + self.__retrieve_and_set_token() + + def read(self): + if self.date_are_valid: + if self.request_type == "performance": + dict_stat, dict_camp, dict_banner = self.__retrieve_all_data() + + complete_daily_content = self.map_campaign_name_to_daily_stat(dict_stat, dict_camp, dict_banner) + yield JSONStream("mytarget_performance_", self.split_content_by_date(complete_daily_content)) + if self.request_type == "budget": + res_dates = self.__get_all_results("get_campaign_dates") + res_budgets = self.__get_all_results("get_campaign_budgets") + + budget_with_dates = self.map_budget_to_date_range(res_dates, res_budgets) + yield JSONStream("mytarget_budget_", self.__yield_from_list(budget_with_dates)) + + def __check_date_input_validity(self) -> bool: + """The goal of this function is to check the validity of the date input parameters before retrieving the data.""" + return self.__check_end_posterior_to_start(self.start_date, self.end_date) and self.__check_date_not_in_future( + self.end_date + ) + + def __check_date_not_in_future(self, end_date: datetime) -> bool: + if end_date <= datetime.today(): + return True + else: + raise ValueError(f"The end date {end_date} is posterior to current date {datetime.today()}") + + def __check_end_posterior_to_start(self, start_date: datetime, end_date: datetime) -> bool: + if start_date > end_date: + raise ValueError(f"The start date {start_date} is posterior to end date {end_date}") + else: + return True + + def __retrieve_and_set_token(self): + """In order to request the api, we need an active token. To do so we use the token which + was provided to get a new one which is going to be active for a day. Once done we set it + as an attribute. + """ + parameters_refresh_token = self.__generate_params_dict("refresh_agency_token") + request_refresh_token = self.__create_request("refresh_agency_token", parameters_refresh_token) + refreshed_token = requests.post(**request_refresh_token).json() + self.set_agency_client_token(refreshed_token) + + def __retrieve_all_data(self) -> Tuple[Dict[str, Dict[str, str]], Dict[str, Dict[str, str]], Dict[str, Dict[str, str]]]: + + response_camp_id_name = self.__get_all_results("get_campaign_ids_names") + response_banner_id_name = self.__get_all_results("get_banner_ids_names") + response_daily_stat = self.__get_response("get_banner_stats") + + dict_stat = self.__transform_list_dict_to_dict(response_daily_stat["items"]) + dict_camp = self.__transform_list_dict_to_dict(response_camp_id_name) + dict_banner = self.__transform_list_dict_to_dict(response_banner_id_name) + + return dict_stat, dict_camp, dict_banner + + def __get_all_results(self, name_content: str, offset=0) -> List[Dict[str, str]]: + """Based on the __get_response function this function is incrementing through offsets according to the + number of elements given by the first response. + + Args: + name_content (str): string representing key of parameters config dict + offset (int, optional): potential offset of the request. Defaults to 0. + + Returns: + List[Dict[str, Any]]: list of dicts resulting from the requests we made to the api + """ + first_elements = self.__get_response(name_content) + count = first_elements["count"] + elements = [first_elements["items"]] + if count > LIMIT_REQUEST_MYTARGET: + elements += [ + self.__get_response(name_content, offset=offset)["items"] + for offset in range( + LIMIT_REQUEST_MYTARGET, self.round_up_to_base(count, LIMIT_REQUEST_MYTARGET), LIMIT_REQUEST_MYTARGET + ) + ] + return list(itertools.chain.from_iterable(elements)) + + def map_campaign_name_to_daily_stat( + self, dict_stat: Dict[str, str], dict_camp: List[Dict[str, str]], dict_banner: Dict[str, str] + ) -> List[Dict[str, str]]: + unused_banners = [] + for ban_id in dict_banner.keys(): + if dict_banner[ban_id]["campaign_id"] in dict_camp.keys(): + dict_banner[ban_id]["campaign_name"] = dict_camp[dict_banner[ban_id]["campaign_id"]]["name"] + dict_banner[ban_id]["rows"] = dict_stat[dict_banner[ban_id]["id"]]["rows"] + else: + unused_banners.append(ban_id) + for unused_ban_id in unused_banners: + dict_banner.pop(unused_ban_id) + return dict_banner + + def map_budget_to_date_range(self, dates: Dict[str, str], budgets: List[Dict[str, str]]) -> List[Dict[str, str]]: + result = [] + dates_dict = self.__transform_list_dict_to_dict(dates) + for budget in budgets: + budget["date_start"] = dates_dict[budget["id"]]["date_start"] + budget["date_end"] = dates_dict[budget["id"]]["date_end"] + budget["status"] = dates_dict[budget["id"]]["status"] + result.append(budget) + return result + + def split_content_by_date(self, content: List[Dict[str, Any]]): + """The goal of this function is to create a line for each date from the date range + for each banner/campaign association. This will be retrieved by the JSON reader thanks + to the yield from. + + Args: + content (List[Dict[str, Any]]): List of the dicts containing all the informations + + Yields: + [type]: the result will be retrieved line by line by the reader. + """ + content_by_date = [] + dates = [] + for key, value in content.items(): + new_line_base = { + "campaign_id": value["campaign_id"], + "campaign_name": value["campaign_name"], + "banner_id": value["id"], + "banner_name": value.get("name"), + } + for dict_daily_stats in value["rows"]: + if dict_daily_stats["date"] not in dates: + dates.append(dict_daily_stats["date"]) + new_line = {**new_line_base, **dict_daily_stats} + content_by_date.append(new_line) + yield from content_by_date + + @retry(wait=wait_exponential(multiplier=1, min=1, max=600), stop=stop_after_delay(600)) + def __get_response(self, name_content: str, offset=0) -> Dict[str, Any]: + """This function makes a request to the api after building eveything necessary to get the + desired results for a specific need which is defined by name_content. + + Args: + name_content (str): string representing key of parameters config dict + offset (int, optional): potential offset of the request. Defaults to 0. + + Returns: + Dict[str, Any]: dict resulting from the request we made to the api + """ + parameters = self.__generate_params_dict(name_content, offset=offset) + request = self.__create_request(name_content, parameters) + resp = requests.get(**request).json() + if "items" not in resp.keys(): + raise MissingItemsInResponse("Can't retrieve any item from this response") + return resp + + def __create_request(self, name_content: str, parameters: Dict[str, Any]) -> Dict[str, Any]: + """This function creates the dict with all the parameters required to query the api + + Args: + name_content (str): string representing key of parameters config dict + parameters (Dict[str, Any]): dict of parameters retrieved from get_params_dict + + Returns: + Dict[str, Any]: dict used to make a request to the api + """ + req_base = { + "url": self.__get_url(name_content), + "headers": self.__get_header(REQUEST_CONFIG[name_content]["headers_type"]), + } + return {**req_base, **parameters} + + def __get_url(self, name_content: str) -> str: + """This function retrieves the url and if it is mandatory to add an id in the url + we fill if using substitute. + + Args: + name_content (str): string representing key of parameters config dict + + Returns: + str: url endpoint + """ + return REQUEST_CONFIG[name_content]["url"] + + def __get_header(self, header_type: str) -> Dict[str, str]: + if header_type == "content_type": + return {"Content-Type": "application/x-www-form-urlencoded", "Host": "target.my.com"} + elif header_type == "authorization": + return {"Authorization": "Bearer " + self.agency_client_token["access_token"], "Host": "target.my.com"} + + def __generate_params_dict(self, name_content: str, offset=0) -> Dict[str, Any]: + """This function returns a dict containing all the parameters required + for the request. + + Args: + name_content (str): string representing key of parameters config dict + offset (int, optional): potential offset of the request. Defaults to 0. + + Returns: + Dict[str, Any]: params to give to the request dict + """ + dict_config = REQUEST_CONFIG[name_content] + params = {} + if name_content == "refresh_agency_token": + params["data"] = { + "grant_type": "refresh_token", + "refresh_token": self.agency_client_token["refresh_token"], + "client_id": self.client_id, + "client_secret": self.client_secret, + } + else: + params["params"] = {} + if dict_config["offset"]: + params["params"]["offset"] = offset + if dict_config["dates_required"]: + params["params"] = { + "date_from": self.start_date.strftime("%Y-%m-%d"), + "date_to": self.end_date.strftime("%Y-%m-%d"), + "metrics": "all", + } + return params + + def set_agency_client_token(self, agency_token: str): + self.agency_client_token = agency_token + + def round_up_to_base(self, x: int, base: int) -> int: + return base * round(x / base) + 1 + + def __yield_from_list(self, content: List[Dict[str, str]]): + yield from content + + def __transform_list_dict_to_dict(self, data: List[Dict[str, str]]) -> Dict[str, Dict[str, str]]: + return {item["id"]: item for item in data} diff --git a/ack/readers/object_storage/__init__.py b/ack/readers/object_storage/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/object_storage/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/object_storage/reader.py b/ack/readers/object_storage/reader.py new file mode 100644 index 00000000..71c4e42a --- /dev/null +++ b/ack/readers/object_storage/reader.py @@ -0,0 +1,96 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import tempfile + +from ack import config +from ack.config import logger +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.file_reader import create_file_reader + + +class ObjectStorageReader(Reader): + def __init__(self, bucket, prefix, file_format, dest_key_split, platform=None, **kwargs): + self._client = self.create_client(config) + self._bucket = self.create_bucket(self._client, bucket) + self._prefix_list = prefix + self._platform = platform + + self._format = file_format + self._reader = create_file_reader(self._format, **kwargs).get_reader() + self._dest_key_split = dest_key_split + + self.MAX_TIMESTAMP_STATE_KEY = f"{self._platform}_max_timestamp".lower() + self.MAX_FILES_STATE_KEY = f"{self._platform}_max_files".lower() + + def read(self): + + for prefix in self._prefix_list: + + objects_sorted_by_time = sorted( + self.list_objects(bucket=self._bucket, prefix=prefix), key=lambda o: self.get_timestamp(o), + ) + + for _object in objects_sorted_by_time: + + _object = self.to_object(_object) + + logger.info(f"Found {self._platform} file {self.get_key(_object)}") + + if not self.is_compatible_object(_object): + logger.info(f"Wrong extension: Skipping file {self.get_key(_object)}") + continue + + name = self.get_key(_object).split("/", self._dest_key_split)[-1] + + yield JSONStream(name, self._result_generator(_object)) + + def _result_generator(self, _object): + with tempfile.TemporaryFile() as temp: + self.download_object_to_file(_object, temp) + for record in self._reader(temp): + yield record + + def is_compatible_object(self, _object): + return self.get_key(_object).endswith("." + self._format) + + def create_client(self, config): + raise NotImplementedError + + def create_bucket(self, client, bucket): + raise NotImplementedError + + def list_objects(self, bucket, prefix): + raise NotImplementedError + + @staticmethod + def get_timestamp(_object): + raise NotImplementedError + + @staticmethod + def get_key(_object): + raise NotImplementedError + + @staticmethod + def to_object(_object): + raise NotImplementedError + + @staticmethod + def download_object_to_file(_object, temp): + raise NotImplementedError diff --git a/ack/readers/radarly/__init__.py b/ack/readers/radarly/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/radarly/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/radarly/cli.py b/ack/readers/radarly/cli.py new file mode 100644 index 00000000..66b7d8a7 --- /dev/null +++ b/ack/readers/radarly/cli.py @@ -0,0 +1,68 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.radarly.reader import RadarlyReader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +@click.command(name="read_radarly") +@click.option("--radarly-pid", required=True, type=click.INT, help="Radarly Project ID") +@click.option("--radarly-client-id", required=True, type=click.STRING) +@click.option("--radarly-client-secret", required=True, type=click.STRING) +@click.option( + "--radarly-focus-id", required=True, multiple=True, type=click.INT, help="Focus IDs (from Radarly queries)", +) +@click.option( + "--radarly-start-date", + required=True, + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S"]), +) +@click.option( + "--radarly-end-date", required=True, type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S"]), +) +@click.option( + "--radarly-api-request-limit", default=250, type=click.INT, help="Max number of posts per API request", +) +@click.option( + "--radarly-api-date-period-limit", + default=int(1e4), + type=click.INT, + help="Max number of posts in a single API search query", +) +@click.option( + "--radarly-api-quarterly-posts-limit", + default=int(45e3), + type=click.INT, + help="Max number of posts requested in the window (usually 15 min) (see Radarly documentation)", +) +@click.option( + "--radarly-api-window", default=300, type=click.INT, help="Duration of the window (usually 300 seconds)", +) +@click.option( + "--radarly-throttle", + default=True, + type=click.BOOL, + help="""If set to True, forces the connector to abide by official Radarly API limitations + (using the api-quarterly-posts-limit parameter)""", +) +@click.option("--radarly-throttling-threshold-coefficient", default=0.95, type=click.FLOAT) +@processor("radarly_client_id", "radarly_client_secret") +def radarly(**kwargs): + return RadarlyReader(**extract_args("radarly_", kwargs)) diff --git a/ack/readers/radarly/config.py b/ack/readers/radarly/config.py new file mode 100644 index 00000000..2853864c --- /dev/null +++ b/ack/readers/radarly/config.py @@ -0,0 +1,28 @@ +from datetime import datetime +from typing import List + +from pydantic import BaseModel, validator + + +class RadarlyReaderConfig(BaseModel): + pid: int + client_id: str + client_secret: str + focus_id: List[int] + start_date: datetime = None + end_date: datetime = None + api_request_limit: int = 250 + api_date_period_limit: int = int(1e4) + api_quarterly_posts_limit: int = int(45e3) + api_window: int = 300 + throttle: bool = True + throttling_threshold_coefficient: float = 0.95 + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v diff --git a/nck/readers/radarly_reader.py b/ack/readers/radarly/reader.py similarity index 57% rename from nck/readers/radarly_reader.py rename to ack/readers/radarly/reader.py index 44e81910..bb7b255c 100644 --- a/nck/readers/radarly_reader.py +++ b/ack/readers/radarly/reader.py @@ -15,28 +15,23 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click -import logging import sys -import traceback import time -import numpy as np -from datetime import datetime, timedelta - -from typing import List, Dict, Tuple -from typing import NamedTuple +import traceback from collections import OrderedDict +from datetime import datetime, timedelta +from typing import Dict, List, NamedTuple, Tuple -from nck.readers import Reader -from nck.commands.command import processor -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.utils.retry import retry -from nck.utils.args import extract_args +import numpy as np +from ack.config import logger +from ack.readers.reader import Reader +from ack.streams.json_stream import JSONStream +from ack.utils.retry import retry from radarly import RadarlyApi -from radarly.project import Project from radarly.parameters import SearchPublicationParameter as Payload +from radarly.project import Project class DateRangeSplit(NamedTuple): @@ -44,70 +39,6 @@ class DateRangeSplit(NamedTuple): is_compliant: bool -@click.command(name="read_radarly") -@click.option("--radarly-pid", required=True, type=click.INT, help="Radarly Project ID") -@click.option("--radarly-client-id", required=True, type=click.STRING) -@click.option("--radarly-client-secret", required=True, type=click.STRING) -@click.option( - "--radarly-focus-id", - required=True, - multiple=True, - type=click.INT, - help="Focus IDs (from Radarly queries)", -) -@click.option( - "--radarly-start-date", - required=True, - type=click.DateTime( - formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S"] - ), -) -@click.option( - "--radarly-end-date", - required=True, - type=click.DateTime( - formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S"] - ), -) -@click.option( - "--radarly-api-request-limit", - default=250, - type=click.INT, - help="Max number of posts per API request", -) -@click.option( - "--radarly-api-date-period-limit", - default=int(1e4), - type=click.INT, - help="Max number of posts in a single API search query", -) -@click.option( - "--radarly-api-quarterly-posts-limit", - default=int(45e3), - type=click.INT, - help="Max number of posts requested in the window (usually 15 min) (see Radarly documentation)", -) -@click.option( - "--radarly-api-window", - default=300, - type=click.INT, - help="Duration of the window (usually 300 seconds)", -) -@click.option( - "--radarly-throttle", - default=True, - type=click.BOOL, - help="""If set to True, forces the connector to abide by official Radarly API limitations - (using the api-quarterly-posts-limit parameter)""", -) -@click.option( - "--radarly-throttling-threshold-coefficient", default=0.95, type=click.FLOAT -) -@processor("radarly_client_id", "radarly_client_secret") -def radarly(**kwargs): - return RadarlyReader(**extract_args("radarly_", kwargs)) - - class RadarlyReader(Reader): def __init__( self, @@ -147,9 +78,7 @@ def read(self): :return: stream that returns Radarly posts one by one """ date_ranges_and_posts_volumes: Dict = self.split_date_range() - logging.info( - f"API Compliant Date Ranges and Posts Volumes: {date_ranges_and_posts_volumes}" - ) + logger.info(f"API Compliant Date Ranges and Posts Volumes: {date_ranges_and_posts_volumes}") api_compliant_date_ranges = list(date_ranges_and_posts_volumes.keys()) t0 = time.time() @@ -161,20 +90,11 @@ def read(self): current_time = time.time() - t0 ingestion_tracker.append(current_time) posts_ingested_over_window = ( - sum(np.array(ingestion_tracker) > current_time - self.api_window) - * self.api_date_period_limit + sum(np.array(ingestion_tracker) > current_time - self.api_window) * self.api_date_period_limit ) - if ( - posts_ingested_over_window - > self.throttling_threshold_coefficient - * self.api_quarterly_posts_limit - ): - sleep_duration = self.api_window * ( - self.api_date_period_limit / self.api_quarterly_posts_limit - ) - logging.info( - f"Throttling activated: waiting for {sleep_duration} seconds..." - ) + if posts_ingested_over_window > self.throttling_threshold_coefficient * self.api_quarterly_posts_limit: + sleep_duration = self.api_window * (self.api_date_period_limit / self.api_quarterly_posts_limit) + logger.info(f"Throttling activated: waiting for {sleep_duration} seconds...") time.sleep(sleep_duration) all_publications = self.get_publications_iterator(date_range) @@ -190,17 +110,15 @@ def result_generator(): break except Exception: ex_type, ex, tb = sys.exc_info() - logging.warning( - f"Failed to ingest post with error: {ex}. Traceback: {traceback.print_tb(tb)}" - ) + logger.warning(f"Failed to ingest post with error: {ex}. Traceback: {traceback.print_tb(tb)}") - yield NormalizedJSONStream(name, result_generator()) + yield JSONStream(name, result_generator()) @retry def get_publications_iterator(self, date_range: Tuple[datetime, datetime]): param = self.get_payload(date_range[0], date_range[1]) all_publications = self.project.get_all_publications(param) - logging.info(f"Getting posts from {date_range[0]} to {date_range[1]}") + logger.info(f"Getting posts from {date_range[0]} to {date_range[1]}") return all_publications @staticmethod @@ -224,9 +142,7 @@ def get_posts_volume(self, first_date: datetime, second_date: datetime) -> int: return posts_volume def get_total_posts_volume(self) -> int: - return self.get_posts_volume( - first_date=self.start_date, second_date=self.end_date - ) + return self.get_posts_volume(first_date=self.start_date, second_date=self.end_date) def get_posts_volumes_from_list( self, date_ranges: List[Tuple[datetime, datetime]] @@ -234,33 +150,26 @@ def get_posts_volumes_from_list( posts_volumes = OrderedDict() for date_range in date_ranges: first_date, second_date = date_range - posts_volume = self.get_posts_volume( - first_date=first_date, second_date=second_date - ) + posts_volume = self.get_posts_volume(first_date=first_date, second_date=second_date) posts_volumes[date_range] = posts_volume return posts_volumes def split_date_range(self) -> Dict[Tuple[datetime, datetime], int]: total_count = self.get_total_posts_volume() - logging.info(f"Posts Total Count: {total_count}") - return self._split_date_range_auxiliary( - self.start_date, self.end_date, posts_count=total_count - ) + logger.info(f"Posts Total Count: {total_count}") + return self._split_date_range_auxiliary(self.start_date, self.end_date, posts_count=total_count) def _split_date_range_auxiliary( self, first_date: datetime, second_date: datetime, posts_count: int ) -> Dict[Tuple[datetime, datetime], int]: if posts_count < self.api_date_period_limit: - logging.debug(f"Direct Return: {[first_date, second_date]}") + logger.debug(f"Direct Return: {[first_date, second_date]}") return OrderedDict({(first_date, second_date): posts_count}) else: date_range_split: DateRangeSplit = self.generate_DateRangeSplit_object( - date_range_start=first_date, - date_range_end=second_date, - posts_count=posts_count, - extra_margin=1, + date_range_start=first_date, date_range_end=second_date, posts_count=posts_count, extra_margin=1, ) date_ranges_and_posts_volumes: Dict[ Tuple[datetime, datetime], int @@ -275,37 +184,20 @@ def _split_date_range_auxiliary( if vol < self.api_date_period_limit: res.update({date_range: vol}) else: - res.update( - self._split_date_range_auxiliary( - *date_range, posts_count=vol - ) - ) + res.update(self._split_date_range_auxiliary(*date_range, posts_count=vol)) return res def generate_DateRangeSplit_object( - self, - date_range_start: datetime, - date_range_end: datetime, - posts_count: int, - extra_margin=1, + self, date_range_start: datetime, date_range_end: datetime, posts_count: int, extra_margin=1, ) -> DateRangeSplit: delta = date_range_end - date_range_start - split_count_guess = ( - posts_count // self.api_date_period_limit + delta.days + extra_margin - ) + split_count_guess = posts_count // self.api_date_period_limit + delta.days + extra_margin split_range_guess = delta.total_seconds() // split_count_guess - date_ranges_guess = self._generate_date_ranges( - date_range_start, date_range_end, split_range_guess, split_count_guess - ) - date_ranges_and_posts_volumes = self.get_posts_volumes_from_list( - date_ranges_guess - ) - is_compliant = all( - np.fromiter(date_ranges_and_posts_volumes.values(), dtype=int) - <= self.api_date_period_limit - ) + date_ranges_guess = self._generate_date_ranges(date_range_start, date_range_end, split_range_guess, split_count_guess) + date_ranges_and_posts_volumes = self.get_posts_volumes_from_list(date_ranges_guess) + is_compliant = all(np.fromiter(date_ranges_and_posts_volumes.values(), dtype=int) <= self.api_date_period_limit) return DateRangeSplit(date_ranges_and_posts_volumes, is_compliant) @@ -314,14 +206,9 @@ def _generate_date_ranges( start_date: datetime, end_date: datetime, split_range: float, split_count: int ) -> List[Tuple[datetime, datetime]]: res = [ - ( - start_date + i * timedelta(seconds=split_range), - start_date + (i + 1) * timedelta(seconds=split_range), - ) + (start_date + i * timedelta(seconds=split_range), start_date + (i + 1) * timedelta(seconds=split_range),) for i in range(split_count - 1) ] - res += [ - (start_date + (split_count - 1) * timedelta(seconds=split_range), end_date) - ] + res += [(start_date + (split_count - 1) * timedelta(seconds=split_range), end_date)] return res diff --git a/nck/readers/reader.py b/ack/readers/reader.py similarity index 90% rename from nck/readers/reader.py rename to ack/readers/reader.py index 090c3675..9292e4de 100644 --- a/nck/readers/reader.py +++ b/ack/readers/reader.py @@ -17,14 +17,7 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import nck.state_service as state - - -class Reader(object): - @property - def state(self): - return state.state() - +class Reader: def read(self): """ The read method takes no arguments, and should return a generator of stream objects. diff --git a/ack/readers/salesforce/__init__.py b/ack/readers/salesforce/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/salesforce/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/salesforce/cli.py b/ack/readers/salesforce/cli.py new file mode 100644 index 00000000..2d21a9bc --- /dev/null +++ b/ack/readers/salesforce/cli.py @@ -0,0 +1,71 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.salesforce.reader import SalesforceReader +from ack.utils.args import extract_args, has_arg, hasnt_arg +from ack.utils.processor import processor + + +@click.command(name="read_salesforce") +@click.option("--salesforce-consumer-key", required=True) +@click.option("--salesforce-consumer-secret", required=True) +@click.option("--salesforce-user", required=True) +@click.option("--salesforce-password", required=True) +@click.option("--salesforce-object-type") +@click.option("--salesforce-query") +@click.option("--salesforce-query-name") +@click.option("--salesforce-watermark-column") +@click.option("--salesforce-watermark-init") +@click.option("--salesforce-redis-state-service-name") +@click.option("--salesforce-redis-state-service-host") +@click.option("--salesforce-redis-state-service-port", default=6379) +@processor("salesforce_consumer_key", "salesforce_consumer_secret", "salesforce_password") +def salesforce(**kwargs): + query_key = "salesforce_query" + query_name_key = "salesforce_query_name" + object_type_key = "salesforce_object_type" + watermark_column_key = "salesforce_watermark_column" + watermark_init_key = "salesforce_watermark_init" + redis_state_service_keys = [ + "salesforce_redis_state_service_name", + "salesforce_redis_state_service_host", + "salesforce_redis_state_service_port", + ] + + if hasnt_arg(query_key, kwargs) and hasnt_arg(object_type_key, kwargs): + raise click.BadParameter("Must specify either an object type or a query for Salesforce") + + if has_arg(query_key, kwargs) and has_arg(object_type_key, kwargs): + raise click.BadParameter("Cannot specify both a query and an object type for Salesforce") + + if has_arg(query_key, kwargs) and hasnt_arg(query_name_key, kwargs): + raise click.BadParameter("Must specify a query name when running a Salesforce query") + + redis_state_service_enabled = all([has_arg(key, kwargs) for key in redis_state_service_keys]) + + if has_arg(watermark_column_key, kwargs) and not redis_state_service_enabled: + raise click.BadParameter("You must configure state management to use Salesforce watermarks") + + if hasnt_arg(watermark_column_key, kwargs) and redis_state_service_enabled: + raise click.BadParameter("You must specify a Salesforce watermark when using state management") + + if hasnt_arg(watermark_init_key, kwargs) and redis_state_service_enabled: + raise click.BadParameter("You must specify an initial Salesforce watermark value when using state management") + + return SalesforceReader(**extract_args("salesforce_", kwargs)) diff --git a/ack/readers/salesforce/config.py b/ack/readers/salesforce/config.py new file mode 100644 index 00000000..4e045a86 --- /dev/null +++ b/ack/readers/salesforce/config.py @@ -0,0 +1,42 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from pydantic import BaseModel + +SALESFORCE_LOGIN_ENDPOINT = "https://login.salesforce.com/services/oauth2/token" +SALESFORCE_LOGIN_REDIRECT = "https://login.salesforce.com/services/oauth2/success" +SALESFORCE_SERVICE_ENDPOINT = "https://eu16.force.com" +SALESFORCE_QUERY_ENDPOINT = "/services/data/v42.0/query/" +SALESFORCE_DESCRIBE_ENDPOINT = "/services/data/v42.0/sobjects/{obj}/describe" + + +class SalesforceReaderConfig(BaseModel): + consumer_key: str + consumer_secret: str + user: str + password: str + object_type: str = None + query: str = None + query_name: str = None + watermark_column: str + watermark_init: str + query: str + query_name: str + table: str + redis_state_service_name: str + redis_state_service_host: str + redis_state_service_port: int = 6379 diff --git a/ack/readers/salesforce/reader.py b/ack/readers/salesforce/reader.py new file mode 100644 index 00000000..20dbc9c3 --- /dev/null +++ b/ack/readers/salesforce/reader.py @@ -0,0 +1,125 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import collections + +from ack.readers.reader import Reader +from ack.clients.salesforce.client import SalesforceClient +from ack.streams.json_stream import JSONStream +from ack.utils.redis import RedisStateService +from ack.utils.retry import retry + + +class SalesforceReader(Reader): + def __init__( + self, + consumer_key, + consumer_secret, + user, + password, + query, + query_name, + object_type, + watermark_column, + watermark_init, + redis_state_service_name, + redis_state_service_host, + redis_state_service_port, + ): + self._name = query_name or object_type + self._client = SalesforceClient(user, password, consumer_key, consumer_secret) + self._watermark_column = watermark_column + self._watermark_init = watermark_init + self._object_type = object_type + self._query = query + self._redis_state_service = RedisStateService( + redis_state_service_name, redis_state_service_host, redis_state_service_port + ) + + def build_object_type_query(self, object_type, watermark_column): + description = self._client.describe(object_type) + fields = [f["name"] for f in description["fields"]] + + field_projection = ", ".join(fields) + query = "SELECT {fields} FROM {object_type}".format(fields=field_projection, object_type=object_type) + + if watermark_column: + query = "{base} WHERE {watermark_column} > {{{watermark_column}}}".format( + base=query, watermark_column=watermark_column + ) + + return query + + @retry + def read(self): + def result_generator(): + + watermark_value = None + + if self._watermark_column: + watermark_value = self._redis_state_service.get(self._name) or self._watermark_init + + if self._object_type: + self._query = self.build_object_type_query(self._object_type, self._watermark_column) + + if self._watermark_column: + self._query = self._query.format(**{self._watermark_column: watermark_value}) + + records = self._client.query(self._query) + + for rec in records: + row = self._clean_record(rec) + yield row + + if self._watermark_column: + self._redis_state_service.set(self._name, row[self._watermark_column]) + + yield JSONStream(self._name, result_generator()) + + @classmethod + def _clean_record(cls, record): + """ + Salesforces records contains metadata which we don't need during ingestion + """ + return cls._flatten(cls._delete_metadata_from_record(record)) + + @classmethod + def _delete_metadata_from_record(cls, record): + + if isinstance(record, dict): + strip_keys = ["attributes", "totalSize", "done"] + return {k: cls._delete_metadata_from_record(v) for k, v in record.items() if k not in strip_keys} + elif isinstance(record, list): + return [cls._delete_metadata_from_record(i) for i in record] + else: + return record + + @classmethod + def _flatten(cls, json_dict, parent_key="", sep="_"): + """ + Reduce number of dict levels + Note: useful to bigquery autodetect schema + """ + items = [] + for k, v in json_dict.items(): + new_key = parent_key + sep + k if parent_key else k + if isinstance(v, collections.MutableMapping): + items.extend(cls._flatten(v, new_key, sep=sep).items()) + else: + items.append((new_key, v)) + return dict(items) diff --git a/ack/readers/the_trade_desk/__init__.py b/ack/readers/the_trade_desk/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/the_trade_desk/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/the_trade_desk/cli.py b/ack/readers/the_trade_desk/cli.py new file mode 100644 index 00000000..4290585d --- /dev/null +++ b/ack/readers/the_trade_desk/cli.py @@ -0,0 +1,54 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.the_trade_desk.reader import TheTradeDeskReader +from ack.utils.args import extract_args +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS +from ack.utils.processor import processor + + +@click.command(name="read_ttd") +@click.option("--ttd-login", required=True, help="Login of your API account") +@click.option("--ttd-password", required=True, help="Password of your API account") +@click.option( + "--ttd-advertiser-id", required=True, multiple=True, help="Advertiser Ids for which report data should be fetched", +) +@click.option( + "--ttd-report-template-name", + required=True, + help="Exact name of the Report Template to request. Existing Report Templates " + "can be found within the MyReports section of The Trade Desk UI.", +) +@click.option( + "--ttd-report-schedule-name", required=True, help="Name of the Report Schedule to create.", +) +@click.option( + "--ttd-start-date", type=click.DateTime(), help="Start date of the period to request (format: YYYY-MM-DD)", +) +@click.option( + "--ttd-end-date", type=click.DateTime(), help="End date of the period to request (format: YYYY-MM-DD)", +) +@click.option( + "--ttd-date-range", + type=click.Choice(DEFAULT_DATE_RANGE_FUNCTIONS.keys()), + help=f"One of the available ACK default date ranges: {DEFAULT_DATE_RANGE_FUNCTIONS.keys()}", +) +@processor("ttd_login", "ttd_password") +def the_trade_desk(**kwargs): + return TheTradeDeskReader(**extract_args("ttd_", kwargs)) diff --git a/nck/helpers/ttd_helper.py b/ack/readers/the_trade_desk/config.py similarity index 54% rename from nck/helpers/ttd_helper.py rename to ack/readers/the_trade_desk/config.py index 0a0dbea2..06dec977 100644 --- a/nck/helpers/ttd_helper.py +++ b/ack/readers/the_trade_desk/config.py @@ -1,3 +1,9 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 3 of the License, or (at your option) any later version. # @@ -9,19 +15,19 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -import logging from datetime import datetime +from typing import List, Literal + +from pydantic import BaseModel, validator + +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS API_HOST = "https://api.thetradedesk.com/v3" API_ENDPOINTS = { "get_report_template_id": ("POST", "myreports/reporttemplateheader/query"), "create_report_schedule": ("POST", "myreports/reportschedule"), - "get_report_execution_details": ( - "POST", - "myreports/reportexecution/query/advertisers", - ), + "get_report_execution_details": ("POST", "myreports/reportexecution/query/advertisers",), "delete_report_schedule": ("DELETE", "/myreports/reportschedule"), } @@ -44,21 +50,21 @@ BQ_DATEFORMAT = "%Y-%m-%d" -class ReportTemplateNotFoundError(Exception): - def __init__(self, message): - super().__init__(message) - logging.error(message) - - -class ReportScheduleNotReadyError(Exception): - def __init__(self, message): - super().__init__(message) - logging.error(message) - +class TheTradeDeskReaderConfig(BaseModel): + login: str + password: str + advertiser_id: List[str] + report_template_name: str + report_schedule_name: str + start_date: datetime = None + end_date: datetime = None + date_range: Literal[tuple(DEFAULT_DATE_RANGE_FUNCTIONS.keys())] = None -def format_date(date_string): - """ - Input: "2020-01-01T00:00:00" - Output: "2020-01-01" - """ - return datetime.strptime(date_string, API_DATEFORMAT).strftime(BQ_DATEFORMAT) + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v diff --git a/ack/readers/the_trade_desk/helper.py b/ack/readers/the_trade_desk/helper.py new file mode 100644 index 00000000..e82e31d4 --- /dev/null +++ b/ack/readers/the_trade_desk/helper.py @@ -0,0 +1,23 @@ +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from datetime import datetime + +from ack.readers.the_trade_desk.config import API_DATEFORMAT, BQ_DATEFORMAT + + +def format_date(date_string): + """ + Input: "2020-01-01T00:00:00" + Output: "2020-01-01" + """ + return datetime.strptime(date_string, API_DATEFORMAT).strftime(BQ_DATEFORMAT) diff --git a/nck/readers/ttd_reader.py b/ack/readers/the_trade_desk/reader.py similarity index 54% rename from nck/readers/ttd_reader.py rename to ack/readers/the_trade_desk/reader.py index fad74013..bfce5cb7 100644 --- a/nck/readers/ttd_reader.py +++ b/ack/readers/the_trade_desk/reader.py @@ -1,3 +1,9 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 3 of the License, or (at your option) any later version. # @@ -10,86 +16,24 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging -import click -from click import ClickException -import requests + from datetime import timedelta -from tenacity import retry, wait_exponential, stop_after_delay - -from nck.utils.args import extract_args -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.streams.json_stream import JSONStream -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.helpers.ttd_helper import ( - API_HOST, - API_ENDPOINTS, - DEFAULT_REPORT_SCHEDULE_ARGS, - DEFAULT_PAGING_ARGS, - ReportTemplateNotFoundError, - ReportScheduleNotReadyError, - format_date, -) -from nck.utils.text import get_report_generator_from_flat_file - - -@click.command(name="read_ttd") -@click.option("--ttd-login", required=True, help="Login of your API account") -@click.option("--ttd-password", required=True, help="Password of your API account") -@click.option( - "--ttd-advertiser-id", - required=True, - multiple=True, - help="Advertiser Ids for which report data should be fetched", -) -@click.option( - "--ttd-report-template-name", - required=True, - help="Exact name of the Report Template to request. Existing Report Templates " - "can be found within the MyReports section of The Trade Desk UI.", -) -@click.option( - "--ttd-report-schedule-name", - required=True, - help="Name of the Report Schedule to create.", -) -@click.option( - "--ttd-start-date", - required=True, - type=click.DateTime(), - help="Start date of the period to request (format: YYYY-MM-DD)", -) -@click.option( - "--ttd-end-date", - required=True, - type=click.DateTime(), - help="End date of the period to request (format: YYYY-MM-DD)", -) -@click.option( - "--ttd-normalize-stream", - type=click.BOOL, - default=False, - help="If set to True, yields a NormalizedJSONStream (spaces and special " - "characters replaced by '_' in field names, which is useful for BigQuery). " - "Else, yields a standard JSONStream.", -) -@processor("ttd_login", "ttd_password") -def the_trade_desk(**kwargs): - return TheTradeDeskReader(**extract_args("ttd_", kwargs)) + +import requests +from ack.config import logger +from ack.readers.reader import Reader +from ack.readers.the_trade_desk.config import API_ENDPOINTS, API_HOST, DEFAULT_PAGING_ARGS, DEFAULT_REPORT_SCHEDULE_ARGS +from ack.readers.the_trade_desk.helper import format_date +from ack.streams.json_stream import JSONStream +from ack.utils.date_handler import build_date_range +from ack.utils.exceptions import ReportScheduleNotReadyError, ReportTemplateNotFoundError +from ack.utils.text import get_report_generator_from_flat_file +from tenacity import retry, stop_after_delay, wait_exponential class TheTradeDeskReader(Reader): def __init__( - self, - login, - password, - advertiser_id, - report_template_name, - report_schedule_name, - start_date, - end_date, - normalize_stream + self, login, password, advertiser_id, report_template_name, report_schedule_name, start_date, end_date, date_range, ): self.login = login self.password = password @@ -97,18 +41,9 @@ def __init__( self.advertiser_ids = list(advertiser_id) self.report_template_name = report_template_name self.report_schedule_name = report_schedule_name - self.start_date = start_date + self.start_date, self.end_date = build_date_range(start_date, end_date, date_range) # Report end date is exclusive: to become inclusive, it should be incremented by 1 day - self.end_date = end_date + timedelta(days=1) - self.normalize_stream = normalize_stream - - self._validate_dates() - - def _validate_dates(self): - if self.end_date - timedelta(days=1) < self.start_date: - raise ClickException( - "Report end date should be equal or ulterior to report start date." - ) + self.end_date = self.end_date + timedelta(days=1) def _get_access_token(self): url = f"{API_HOST}/authentication" @@ -129,9 +64,7 @@ def _build_headers(self): def _make_api_call(self, method, endpoint, payload={}): url = f"{API_HOST}/{endpoint}" - response = requests.request( - method=method, url=url, headers=self.headers, json=payload - ) + response = requests.request(method=method, url=url, headers=self.headers, json=payload) if response.ok: if response.content: return response.json() @@ -139,14 +72,12 @@ def _make_api_call(self, method, endpoint, payload={}): response.raise_for_status() def _get_report_template_id(self): - logging.info(f"Collecting ReportTemplateId of '{self.report_template_name}'") + logger.info(f"Collecting ReportTemplateId of '{self.report_template_name}'") method, endpoint = API_ENDPOINTS["get_report_template_id"] payload = {"NameContains": self.report_template_name, **DEFAULT_PAGING_ARGS} json_response = self._make_api_call(method, endpoint, payload) if json_response["ResultCount"] == 0: - raise ReportTemplateNotFoundError( - f"No existing ReportTemplate match '{self.report_template_name}'" - ) + raise ReportTemplateNotFoundError(f"No existing ReportTemplate match '{self.report_template_name}'") if json_response["ResultCount"] > 1: raise ReportTemplateNotFoundError( f"""'{self.report_template_name}' match more than one ReportTemplate. @@ -154,7 +85,7 @@ def _get_report_template_id(self): ) else: self.report_template_id = json_response["Result"][0]["ReportTemplateId"] - logging.info(f"Retrieved ReportTemplateId: {self.report_template_id}") + logger.info(f"Retrieved ReportTemplateId: {self.report_template_id}") def _create_report_schedule(self): method, endpoint = API_ENDPOINTS["create_report_schedule"] @@ -166,29 +97,22 @@ def _create_report_schedule(self): "ReportEndDateExclusive": self.end_date.isoformat(), **DEFAULT_REPORT_SCHEDULE_ARGS, } - logging.info(f"Creating ReportSchedule: {payload}") + logger.info(f"Creating ReportSchedule: {payload}") json_response = self._make_api_call(method, endpoint, payload) self.report_schedule_id = json_response["ReportScheduleId"] @retry( - wait=wait_exponential(multiplier=1, min=60, max=3600), - stop=stop_after_delay(36000), + wait=wait_exponential(multiplier=1, min=60, max=3600), stop=stop_after_delay(36000), ) def _wait_for_download_url(self): report_execution_details = self._get_report_execution_details() if report_execution_details["ReportExecutionState"] == "Pending": - raise ReportScheduleNotReadyError( - f"ReportSchedule '{self.report_schedule_id}' is still running." - ) + raise ReportScheduleNotReadyError(f"ReportSchedule '{self.report_schedule_id}' is still running.") else: # As the ReportSchedule that we just created runs only once, # the API response will include only one ReportDelivery (so we can get index "[0]") - self.download_url = report_execution_details["ReportDeliveries"][0][ - "DownloadURL" - ] - logging.info( - f"ReportScheduleId '{self.report_schedule_id}' is ready. DownloadURL: {self.download_url}" - ) + self.download_url = report_execution_details["ReportDeliveries"][0]["DownloadURL"] + logger.info(f"ReportScheduleId '{self.report_schedule_id}' is ready. DownloadURL: {self.download_url}") def _get_report_execution_details(self): method, endpoint = API_ENDPOINTS["get_report_execution_details"] @@ -208,7 +132,7 @@ def _download_report(self): return get_report_generator_from_flat_file(report.iter_lines()) def _delete_report_schedule(self): - logging.info(f"Deleting ReportScheduleId '{self.report_schedule_id}'") + logger.info(f"Deleting ReportScheduleId '{self.report_schedule_id}'") method, endpoint = API_ENDPOINTS["delete_report_schedule"] self._make_api_call(method, f"{endpoint}/{self.report_schedule_id}") @@ -220,17 +144,8 @@ def read(self): def result_generator(): for record in data: - yield { - k: format_date(v) if k == "Date" else v for k, v in record.items() - } + yield {k: format_date(v) if k == "Date" else v for k, v in record.items()} - if self.normalize_stream: - yield NormalizedJSONStream( - "results_" + "_".join(self.advertiser_ids), result_generator() - ) - else: - yield JSONStream( - "results_" + "_".join(self.advertiser_ids), result_generator() - ) + yield JSONStream("results_" + "_".join(self.advertiser_ids), result_generator()) self._delete_report_schedule() diff --git a/ack/readers/twitter/__init__.py b/ack/readers/twitter/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/twitter/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/twitter/cli.py b/ack/readers/twitter/cli.py new file mode 100644 index 00000000..f6ff3d8e --- /dev/null +++ b/ack/readers/twitter/cli.py @@ -0,0 +1,132 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.twitter.config import ( + ENTITY_ATTRIBUTES, + GRANULARITIES, + METRIC_GROUPS, + PLACEMENTS, + REPORT_TYPES, + SEGMENTATION_TYPES, +) +from ack.readers.twitter.reader import TwitterReader +from ack.utils.args import extract_args +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS +from ack.utils.processor import processor + + +@click.command(name="read_twitter") +@click.option( + "--twitter-consumer-key", + required=True, + help="API key, available in the 'Keys and tokens' section of your Twitter Developper App.", +) +@click.option( + "--twitter-consumer-secret", + required=True, + help="API secret key, available in the 'Keys and tokens' section of your Twitter Developper App.", +) +@click.option( + "--twitter-access-token", + required=True, + help="Access token, available in the 'Keys and tokens' section of your Twitter Developper App.", +) +@click.option( + "--twitter-access-token-secret", + required=True, + help="Access token secret, available in the 'Keys and tokens' section of your Twitter Developper App.", +) +@click.option( + "--twitter-account-id", required=True, help="Specifies the Twitter Account ID for which the data should be returned.", +) +@click.option( + "--twitter-report-type", + required=True, + type=click.Choice(REPORT_TYPES), + help="Specifies the type of report to collect: " + "ANALYTICS (performance report, any kind of metrics), " + "REACH (performance report, focus on reach and frequency metrics), " + "ENTITY (entity configuration report)", +) +@click.option( + "--twitter-entity", + required=True, + type=click.Choice(list(ENTITY_ATTRIBUTES.keys())), + help="Specifies the entity type to retrieve data for.", +) +@click.option( + "--twitter-entity-attribute", + multiple=True, + help="Specific to 'ENTITY' reports. " "Specifies the entity attribute (a.k.a. dimension) that should be returned.", +) +@click.option( + "--twitter-granularity", + type=click.Choice(GRANULARITIES), + default="TOTAL", + help="Specific to 'ANALYTICS' reports. Specifies how granular the retrieved data should be.", +) +@click.option( + "--twitter-metric-group", + multiple=True, + type=click.Choice(METRIC_GROUPS), + help="Specific to 'ANALYTICS' reports. Specifies the list of metrics (as a group) that should be returned: " + "https://developer.twitter.com/en/docs/ads/analytics/overview/metrics-and-segmentation", +) +@click.option( + "--twitter-placement", + type=click.Choice(PLACEMENTS), + default="ALL_ON_TWITTER", + help="Specific to 'ANALYTICS' reports. Scopes the retrieved data to a particular placement.", +) +@click.option( + "--twitter-segmentation-type", + type=click.Choice(SEGMENTATION_TYPES), + help="Specific to 'ANALYTICS' reports. Specifies how the retrieved data should be segmented: " + "https://developer.twitter.com/en/docs/ads/analytics/overview/metrics-and-segmentation", +) +@click.option( + "--twitter-platform", + help="Specific to 'ANALYTICS' reports. Required if segmentation_type is set to 'DEVICES' or 'PLATFORM_VERSION'. " + "To get possible values: GET targeting_criteria/locations", +) +@click.option( + "--twitter-country", + help="Specific to 'ANALYTICS' reports. Required if segmentation_type is set to 'CITIES', 'POSTAL_CODES', or 'REGION'. " + "To get possible values: GET targeting_criteria/platforms", +) +@click.option("--twitter-start-date", type=click.DateTime(), help="Specifies report start date.") +@click.option( + "--twitter-end-date", type=click.DateTime(), help="Specifies report end date (inclusive).", +) +@click.option( + "--twitter-add-request-date-to-report", + type=click.BOOL, + default=False, + help="If set to 'True', the date on which the request is made will appear on each report record.", +) +@click.option( + "--twitter-date-range", + type=click.Choice(DEFAULT_DATE_RANGE_FUNCTIONS.keys()), + help=f"One of the available ACK default date ranges: {DEFAULT_DATE_RANGE_FUNCTIONS.keys()}", +) +@processor( + "twitter_consumer_key", "twitter_consumer_secret", "twitter_access_token", "twitter_access_token_secret", +) +def twitter(**kwargs): + return TwitterReader(**extract_args("twitter_", kwargs)) diff --git a/ack/readers/twitter/config.py b/ack/readers/twitter/config.py new file mode 100644 index 00000000..771ea252 --- /dev/null +++ b/ack/readers/twitter/config.py @@ -0,0 +1,115 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from typing import List, Literal + +from twitter_ads.campaign import Campaign, FundingInstrument, LineItem +from twitter_ads.creative import CardsFetch, MediaCreative, PromotedTweet +from pydantic import BaseModel, validator +from datetime import datetime + +from ack.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS + +API_DATEFORMAT = "%Y-%m-%dT%H:%M:%SZ" +REP_DATEFORMAT = "%Y-%m-%d" +MAX_WAITING_SEC = 3600 +MAX_ENTITY_IDS_PER_JOB = 20 +MAX_CONCURRENT_JOBS = 100 + +REPORT_TYPES = ["ANALYTICS", "REACH", "ENTITY"] + +ENTITY_OBJECTS = { + "FUNDING_INSTRUMENT": FundingInstrument, + "CAMPAIGN": Campaign, + "LINE_ITEM": LineItem, + "MEDIA_CREATIVE": MediaCreative, + "PROMOTED_TWEET": PromotedTweet, +} + +ENTITY_ATTRIBUTES = { + **{entity: list(ENTITY_OBJECTS[entity].__dict__["PROPERTIES"].keys()) for entity in ENTITY_OBJECTS}, + "CARD": list(CardsFetch.__dict__["PROPERTIES"].keys()), +} + +GRANULARITIES = ["DAY", "TOTAL"] + +METRIC_GROUPS = [ + "ENGAGEMENT", + "BILLING", + "VIDEO", + "MEDIA", + "MOBILE_CONVERSION", + "WEB_CONVERSION", + "LIFE_TIME_VALUE_MOBILE_CONVERSION", +] + +PLACEMENTS = [ + "ALL_ON_TWITTER", + "PUBLISHER_NETWORK", +] + +SEGMENTATION_TYPES = [ + "AGE", + "APP_STORE_CATEGORY", + "AUDIENCES", + "CONVERSATIONS", + "CONVERSION_TAGS", + "DEVICES", + "EVENTS", + "GENDER", + "INTERESTS", + "KEYWORDS", + "LANGUAGES", + "LOCATIONS", + "METROS", + "PLATFORMS", + "PLATFORM_VERSIONS", + "POSTAL_CODES", + "REGIONS", + "SIMILAR_TO_FOLLOWERS_OF_USER", + "TV_SHOWS", +] + + +class TwitterReaderConfig(BaseModel): + consumer_key: str + consumer_secret: str + access_token: str + access_token_secret: str + account_id: str + report_type: Literal[tuple(REPORT_TYPES)] + entity: Literal[tuple(ENTITY_ATTRIBUTES.keys())] + entity_attribute: List[str] = [] + granularity: Literal[tuple(GRANULARITIES)] = "TOTAL" + metric_group: List[Literal[tuple(METRIC_GROUPS)]] = [] + placement: Literal[tuple(PLACEMENTS)] = "ALL_ON_TWITTER" + segmentation_type: Literal[tuple(SEGMENTATION_TYPES)] = None + platform: str = None + country: str = None + start_date: datetime = None + end_date: datetime = None + add_request_date_to_report: bool = False + date_range: Literal[tuple(DEFAULT_DATE_RANGE_FUNCTIONS.keys())] = None + + @validator("start_date", "end_date", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v diff --git a/nck/readers/twitter_reader.py b/ack/readers/twitter/reader.py similarity index 60% rename from nck/readers/twitter_reader.py rename to ack/readers/twitter/reader.py index d82c6014..7f39adea 100644 --- a/nck/readers/twitter_reader.py +++ b/ack/readers/twitter/reader.py @@ -1,3 +1,9 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 3 of the License, or (at your option) any later version. # @@ -9,148 +15,34 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -import logging -import click -from click import ClickException -from itertools import chain +import sys +import traceback from datetime import datetime, timedelta -from tenacity import retry, wait_exponential, stop_after_delay - -from nck.utils.args import extract_args -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.streams.json_stream import JSONStream -from nck.helpers.twitter_helper import ( - REPORT_TYPES, - ENTITY_OBJECTS, +from itertools import chain +from click import ClickException +from ack.config import LEVEL, logger +from ack.readers.reader import Reader +from ack.readers.twitter.config import ( + API_DATEFORMAT, + # MAX_WAITING_SEC, ENTITY_ATTRIBUTES, - GRANULARITIES, - METRIC_GROUPS, - PLACEMENTS, - SEGMENTATION_TYPES, + ENTITY_OBJECTS, + MAX_CONCURRENT_JOBS, + MAX_ENTITY_IDS_PER_JOB, + REP_DATEFORMAT, ) - -from twitter_ads.client import Client -from twitter_ads.utils import split_list +from ack.streams.json_stream import JSONStream +from ack.utils.date_handler import build_date_range +from tenacity import retry, stop_after_delay, wait_exponential, retry_if_exception_type, before_sleep_log from twitter_ads import API_VERSION -from twitter_ads.http import Request -from twitter_ads.cursor import Cursor +from twitter_ads.client import Client # from twitter_ads.creative import TweetPreview from twitter_ads.creative import CardsFetch - -API_DATEFORMAT = "%Y-%m-%dT%H:%M:%SZ" -REP_DATEFORMAT = "%Y-%m-%d" -MAX_WAITING_SEC = 3600 -MAX_ENTITY_IDS_PER_JOB = 20 -MAX_CONCURRENT_JOBS = 100 - - -@click.command(name="read_twitter") -@click.option( - "--twitter-consumer-key", - required=True, - help="API key, available in the 'Keys and tokens' section of your Twitter Developper App.", -) -@click.option( - "--twitter-consumer-secret", - required=True, - help="API secret key, available in the 'Keys and tokens' section of your Twitter Developper App.", -) -@click.option( - "--twitter-access-token", - required=True, - help="Access token, available in the 'Keys and tokens' section of your Twitter Developper App.", -) -@click.option( - "--twitter-access-token-secret", - required=True, - help="Access token secret, available in the 'Keys and tokens' section of your Twitter Developper App.", -) -@click.option( - "--twitter-account-id", - required=True, - help="Specifies the Twitter Account ID for which the data should be returned.", -) -@click.option( - "--twitter-report-type", - required=True, - type=click.Choice(REPORT_TYPES), - help="Specifies the type of report to collect: " - "ANALYTICS (performance report, any kind of metrics), " - "REACH (performance report, focus on reach and frequency metrics), " - "ENTITY (entity configuration report)", -) -@click.option( - "--twitter-entity", - required=True, - type=click.Choice(list(ENTITY_ATTRIBUTES.keys())), - help="Specifies the entity type to retrieve data for.", -) -@click.option( - "--twitter-entity-attribute", - multiple=True, - help="Specific to 'ENTITY' reports. " - "Specifies the entity attribute (a.k.a. dimension) that should be returned.", -) -@click.option( - "--twitter-granularity", - type=click.Choice(GRANULARITIES), - default="TOTAL", - help="Specific to 'ANALYTICS' reports. Specifies how granular the retrieved data should be.", -) -@click.option( - "--twitter-metric-group", - multiple=True, - type=click.Choice(METRIC_GROUPS), - help="Specific to 'ANALYTICS' reports. Specifies the list of metrics (as a group) that should be returned: " - "https://developer.twitter.com/en/docs/ads/analytics/overview/metrics-and-segmentation", -) -@click.option( - "--twitter-placement", - type=click.Choice(PLACEMENTS), - default="ALL_ON_TWITTER", - help="Specific to 'ANALYTICS' reports. Scopes the retrieved data to a particular placement.", -) -@click.option( - "--twitter-segmentation-type", - type=click.Choice(SEGMENTATION_TYPES), - help="Specific to 'ANALYTICS' reports. Specifies how the retrieved data should be segmented: " - "https://developer.twitter.com/en/docs/ads/analytics/overview/metrics-and-segmentation", -) -@click.option( - "--twitter-platform", - help="Specific to 'ANALYTICS' reports. Required if segmentation_type is set to 'DEVICES' or 'PLATFORM_VERSION'. " - "To get possible values: GET targeting_criteria/locations", -) -@click.option( - "--twitter-country", - help="Specific to 'ANALYTICS' reports. Required if segmentation_type is set to 'CITIES', 'POSTAL_CODES', or 'REGION'. " - "To get possible values: GET targeting_criteria/platforms", -) -@click.option( - "--twitter-start-date", type=click.DateTime(), help="Specifies report start date." -) -@click.option( - "--twitter-end-date", - type=click.DateTime(), - help="Specifies report end date (inclusive).", -) -@click.option( - "--twitter-add-request-date-to-report", - type=click.BOOL, - default=False, - help="If set to 'True', the date on which the request is made will appear on each report record.", -) -@processor( - "twitter_consumer_key", - "twitter_consumer_secret", - "twitter_access_token", - "twitter_access_token_secret", -) -def twitter(**kwargs): - return TwitterReader(**extract_args("twitter_", kwargs)) +from twitter_ads.cursor import Cursor +from twitter_ads.http import Request +from twitter_ads.utils import split_list +from twitter_ads.error import RateLimit class TwitterReader(Reader): @@ -173,18 +65,17 @@ def __init__( start_date, end_date, add_request_date_to_report, + date_range, ): # Authentication inputs - self.client = Client( - consumer_key, consumer_secret, access_token, access_token_secret - ) + self.client = Client(consumer_key, consumer_secret, access_token, access_token_secret) self.account = self.client.accounts(account_id) # General inputs self.report_type = report_type self.entity = entity - self.start_date = start_date - self.end_date = end_date + timedelta(days=1) + self.start_date, self.end_date = build_date_range(start_date, end_date, date_range) + self.end_date = self.end_date + timedelta(days=1) self.add_request_date_to_report = add_request_date_to_report # Report inputs: ENTITY @@ -206,33 +97,19 @@ def validate_inputs(self): Validate combination of input parameters (triggered in TwitterReader constructor). """ - self.validate_dates() self.validate_analytics_segmentation() self.validate_analytics_metric_groups() self.validate_analytics_entity() self.validate_reach_entity() self.validate_entity_attributes() - def validate_dates(self): - - if self.end_date - timedelta(days=1) < self.start_date: - raise ClickException( - "Report end date should be equal or ulterior to report start date." - ) - def validate_analytics_segmentation(self): if self.report_type == "ANALYTICS": - if ( - self.segmentation_type in ["DEVICES", "PLATFORM VERSION"] - and not self.platform - ): + if self.segmentation_type in ["DEVICES", "PLATFORM VERSION"] and not self.platform: raise ClickException("Please provide a value for 'platform'.") - elif ( - self.segmentation_type in ["CITIES", "POSTAL_CODES", "REGION"] - and not self.country - ): + elif self.segmentation_type in ["CITIES", "POSTAL_CODES", "REGION"] and not self.country: raise ClickException("Please provide a value for 'country'.") def validate_analytics_metric_groups(self): @@ -240,54 +117,33 @@ def validate_analytics_metric_groups(self): if self.report_type == "ANALYTICS": if self.entity == "FUNDING_INSTRUMENT" and any( - [ - metric_group not in ["ENGAGEMENT", "BILLING"] - for metric_group in self.metric_groups - ] + [metric_group not in ["ENGAGEMENT", "BILLING"] for metric_group in self.metric_groups] ): - raise ClickException( - "'FUNDING_INSTRUMENT' only accept the 'ENGAGEMENT' and 'BILLING' metric groups." - ) + raise ClickException("'FUNDING_INSTRUMENT' only accept the 'ENGAGEMENT' and 'BILLING' metric groups.") - if ( - "MOBILE_CONVERSION" in self.metric_groups - and len(self.metric_groups) > 1 - ): - raise ClickException( - "'MOBILE_CONVERSION' data should be requested separately." - ) + if "MOBILE_CONVERSION" in self.metric_groups and len(self.metric_groups) > 1: + raise ClickException("'MOBILE_CONVERSION' data should be requested separately.") def validate_analytics_entity(self): if self.report_type == "ANALYTICS": if self.entity == "CARD": - raise ClickException( - f"'ANALYTICS' reports only accept following entities: {list(ENTITY_OBJECTS.keys())}." - ) + raise ClickException(f"'ANALYTICS' reports only accept following entities: {list(ENTITY_OBJECTS.keys())}.") def validate_reach_entity(self): if self.report_type == "REACH": if self.entity not in ["CAMPAIGN", "FUNDING_INSTRUMENT"]: - raise ClickException( - "'REACH' reports only accept the following entities: CAMPAIGN, FUNDING_INSTRUMENT." - ) + raise ClickException("'REACH' reports only accept the following entities: CAMPAIGN, FUNDING_INSTRUMENT.") def validate_entity_attributes(self): if self.report_type == "ENTITY": - if not all( - [ - attr in ENTITY_ATTRIBUTES[self.entity] - for attr in self.entity_attributes - ] - ): - raise ClickException( - f"Available attributes for '{self.entity}' are: {ENTITY_ATTRIBUTES[self.entity]}" - ) + if not all([attr in ENTITY_ATTRIBUTES[self.entity] for attr in self.entity_attributes]): + raise ClickException(f"Available attributes for '{self.entity}' are: {ENTITY_ATTRIBUTES[self.entity]}") def get_analytics_report(self, job_ids): """ @@ -299,13 +155,13 @@ def get_analytics_report(self, job_ids): for job_id in job_ids: - logging.info(f"Processing job_id: {job_id}") + logger.info(f"Processing job_id: {job_id}") # job_result = self.get_job_result(job_id) # waiting_sec = 2 # while job_result.status == "PROCESSING": - # logging.info(f"Waiting {waiting_sec} seconds for job to be completed") + # logger.info(f"Waiting {waiting_sec} seconds for job to be completed") # sleep(waiting_sec) # if waiting_sec > MAX_WAITING_SEC: # raise JobTimeOutError("Waited too long for job to be completed") @@ -325,9 +181,7 @@ def get_active_entity_ids(self): Documentation: https://developer.twitter.com/en/docs/ads/analytics/api-reference/active-entities """ - active_entities = ENTITY_OBJECTS[self.entity].active_entities( - self.account, self.start_date, self.end_date - ) + active_entities = ENTITY_OBJECTS[self.entity].active_entities(self.account, self.start_date, self.end_date) return [obj["entity_id"] for obj in active_entities] def get_job_ids(self, entity_ids): @@ -355,10 +209,7 @@ def get_job_ids(self, entity_ids): for chunk_entity_ids in split_list(entity_ids, MAX_ENTITY_IDS_PER_JOB) ] - @retry( - wait=wait_exponential(multiplier=1, min=60, max=3600), - stop=stop_after_delay(36000), - ) + @retry(wait=wait_exponential(multiplier=1, min=60, max=3600), stop=stop_after_delay(36000)) def _waiting_for_job_to_complete(self, job_id): """ Retrying to get job_result until job status is 'COMPLETED'. @@ -376,11 +227,7 @@ def get_job_result(self, job_id): Documentation: https://developer.twitter.com/en/docs/ads/analytics/api-reference/asynchronous """ - return ( - ENTITY_OBJECTS[self.entity] - .async_stats_job_result(self.account, job_ids=[job_id]) - .first - ) + return ENTITY_OBJECTS[self.entity].async_stats_job_result(self.account, job_ids=[job_id]).first def get_raw_analytics_response(self, job_result): """ @@ -389,9 +236,7 @@ def get_raw_analytics_response(self, job_result): Documentation: https://developer.twitter.com/en/docs/ads/analytics/api-reference/asynchronous """ - return ENTITY_OBJECTS[self.entity].async_stats_job_data( - self.account, url=job_result.url - ) + return ENTITY_OBJECTS[self.entity].async_stats_job_data(self.account, url=job_result.url) def parse(self, raw_analytics_response): """ @@ -404,9 +249,7 @@ def parse(self, raw_analytics_response): { "id": entity_resp["id"], **{ - mt: 0 - if entity_data["metrics"][mt] is None - else entity_data["metrics"][mt][i] + mt: 0 if entity_data["metrics"][mt] is None else entity_data["metrics"][mt][i] for mt in entity_data["metrics"] }, } @@ -424,8 +267,7 @@ def add_daily_timestamps(self, entity_records): if self.granularity == "DAY": period_items = self.get_daily_period_items() return [ - {**entity_records[i], "date": period_items[i].strftime(REP_DATEFORMAT)} - for i in range(len(entity_records)) + {**entity_records[i], "date": period_items[i].strftime(REP_DATEFORMAT)} for i in range(len(entity_records)) ] return entity_records @@ -445,10 +287,7 @@ def add_segment(self, entity_records, entity_data): if self.segmentation_type: entity_segment = entity_data["segment"]["segment_name"] - return [ - {**rec, self.segmentation_type.lower(): entity_segment} - for rec in entity_records - ] + return [{**rec, self.segmentation_type.lower(): entity_segment} for rec in entity_records] return entity_records def get_campaign_management_report(self): @@ -477,14 +316,10 @@ def get_cards_report(self): Supported entities: CARD Documentation: https://developer.twitter.com/en/docs/ads/creatives/api-reference/ """ - - for tweet in self.get_published_tweets(): + for tweet in self.get_published_tweets_generator(): if "card_uri" in tweet: card_fetch = self.get_card_fetch(card_uri=tweet["card_uri"]) - card_attributes = { - attr: getattr(card_fetch, attr, None) - for attr in self.entity_attributes - } + card_attributes = {attr: getattr(card_fetch, attr, None) for attr in self.entity_attributes} record = { "tweet_id": tweet["tweet_id"], "card_uri": tweet["card_uri"], @@ -492,10 +327,19 @@ def get_cards_report(self): } yield record + @retry( + wait=wait_exponential(multiplier=60, max=300), + stop=stop_after_delay(1200), + retry=retry_if_exception_type(RateLimit), + before_sleep=before_sleep_log(logger, LEVEL), + ) + def get_published_tweets_generator(self): + return self.get_published_tweets() + def get_published_tweets(self): """ Step 1 of 'ENTITY - CARD' report generation process: - Returns details on 'PUBLISHED' tweets, as a generator of dictionnaries + Returns details on 'PUBLISHED' tweets, as a generator of dictionaries Documentation: https://developer.twitter.com/en/docs/ads/creatives/api-reference/tweets """ @@ -505,15 +349,25 @@ def get_published_tweets(self): yield from Cursor(None, request) + @retry( + wait=wait_exponential(multiplier=60, max=600), + stop=stop_after_delay(1200), + retry=retry_if_exception_type(RateLimit), + before_sleep=before_sleep_log(logger, LEVEL), + ) def get_card_fetch(self, card_uri): """ Step 2 of 'ENTITY - CARD' report generation process: Returns the CartFetch object associated with a specific card_uri Documentation: https://developer.twitter.com/en/docs/ads/creatives/api-reference/cards-fetch """ - return CardsFetch.load(self.account, card_uris=[card_uri]).first + @retry( + wait=wait_exponential(multiplier=60, max=600), + stop=stop_after_delay(3600), + before_sleep=before_sleep_log(logger, LEVEL), + ) def get_reach_report(self): """ Get 'REACH' report through the 'Reach and Average Frequency' endpoint of Twitter Ads API. @@ -524,14 +378,18 @@ def get_reach_report(self): entity_ids = self.get_active_entity_ids() for chunk_entity_ids in split_list(entity_ids, MAX_ENTITY_IDS_PER_JOB): - params = { - "account_id": self.account.id, - f"{self.entity.lower()}_ids": ",".join(entity_ids), - "start_time": self.start_date.strftime(API_DATEFORMAT), - "end_time": self.end_date.strftime(API_DATEFORMAT), - } - request = Request(self.client, "get", resource, params=params) - yield from Cursor(None, request) + try: + params = { + "account_id": self.account.id, + f"{self.entity.lower()}_ids": ",".join(entity_ids), + "start_time": self.start_date.strftime(API_DATEFORMAT), + "end_time": self.end_date.strftime(API_DATEFORMAT), + } + request = Request(self.client, "get", resource, params=params) + yield from Cursor(None, request) + except Exception: + ex_type, ex, tb = sys.exc_info() + logger.warning(f"Failed to ingest post with error: {ex}. Traceback: {traceback.print_tb(tb)}") def add_request_or_period_dates(self, record): """ @@ -539,33 +397,26 @@ def add_request_or_period_dates(self, record): """ def check_add_period_date_to_report(): - return ( - self.report_type == "ANALYTICS" and self.granularity == "TOTAL" - ) or self.report_type == "REACH" + return (self.report_type == "ANALYTICS" and self.granularity == "TOTAL") or self.report_type == "REACH" if self.add_request_date_to_report: record["request_date"] = datetime.today().strftime(REP_DATEFORMAT) if check_add_period_date_to_report(): record["period_start_date"] = self.start_date.strftime(REP_DATEFORMAT) - record["period_end_date"] = (self.end_date - timedelta(days=1)).strftime( - REP_DATEFORMAT - ) + record["period_end_date"] = (self.end_date - timedelta(days=1)).strftime(REP_DATEFORMAT) return record def read(self): - if self.report_type == "ANALYTICS": entity_ids = self.get_active_entity_ids() total_jobs = (len(entity_ids) // MAX_ENTITY_IDS_PER_JOB) + 1 - logging.info(f"Processing a total of {total_jobs} jobs") + logger.info(f"Processing a total of {total_jobs} jobs") data = [] - for chunk_entity_ids in split_list( - entity_ids, MAX_ENTITY_IDS_PER_JOB * MAX_CONCURRENT_JOBS - ): + for chunk_entity_ids in split_list(entity_ids, MAX_ENTITY_IDS_PER_JOB * MAX_CONCURRENT_JOBS): job_ids = self.get_job_ids(chunk_entity_ids) data += self.get_analytics_report(job_ids) diff --git a/ack/readers/yandex_campaign/__init__.py b/ack/readers/yandex_campaign/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/yandex_campaign/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/yandex_campaign/cli.py b/ack/readers/yandex_campaign/cli.py new file mode 100644 index 00000000..6690ea92 --- /dev/null +++ b/ack/readers/yandex_campaign/cli.py @@ -0,0 +1,53 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.readers.yandex_campaign.config import CAMPAIGN_FIELDS, CAMPAIGN_PAYMENT_STATUSES, CAMPAIGN_STATES, CAMPAIGN_STATUSES +from ack.readers.yandex_campaign.reader import YandexCampaignReader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +@click.command(name="read_yandex_campaigns") +@click.option("--yandex-campaigns-token", "yandex_token", required=True) +@click.option("--yandex-campaigns-campaign-id", "yandex_campaign_ids", multiple=True) +@click.option("--yandex-campaigns-campaign-state", "yandex_campaign_states", multiple=True, type=click.Choice(CAMPAIGN_STATES)) +@click.option( + "--yandex-campaigns-campaign-status", "yandex_campaign_statuses", multiple=True, type=click.Choice(CAMPAIGN_STATUSES) +) +@click.option( + "--yandex-campaigns-campaign-payment-status", + "yandex_campaign_payment_statuses", + multiple=True, + type=click.Choice(CAMPAIGN_PAYMENT_STATUSES), +) +@click.option( + "--yandex-campaigns-field-name", + "yandex_fields", + multiple=True, + type=click.Choice(CAMPAIGN_FIELDS), + required=True, + help=( + "Fields to output in the report (columns)." + "For the full list of fields and their meanings, " + "see https://tech.yandex.com/direct/doc/reports/fields-list-docpage/" + ), +) +@processor("yandex_token") +def yandex_campaigns(**kwargs): + return YandexCampaignReader(**extract_args("yandex_", kwargs)) diff --git a/ack/readers/yandex_campaign/config.py b/ack/readers/yandex_campaign/config.py new file mode 100644 index 00000000..b293f5d4 --- /dev/null +++ b/ack/readers/yandex_campaign/config.py @@ -0,0 +1,62 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from typing import List, Literal + +from pydantic import BaseModel + +YANDEX_DIRECT_API_BASE_URL = "https://api.direct.yandex.com/json/v5/" + +CAMPAIGN_FIELDS = [ + "BlockedIps", + "ExcludedSites", + "Currency", + "DailyBudget", + "Notification", + "EndDate", + "Funds", + "ClientInfo", + "Id", + "Name", + "NegativeKeywords", + "RepresentedBy", + "StartDate", + "Statistics", + "State", + "Status", + "StatusPayment", + "StatusClarification", + "SourceId", + "TimeTargeting", + "TimeZone", + "Type", +] + +CAMPAIGN_STATES = ["ARCHIVED", "CONVERTED", "ENDED", "OFF", "ON", "SUSPENDED"] + +CAMPAIGN_STATUSES = ["ACCEPTED", "DRAFT", "MODERATION", "REJECTED"] + +CAMPAIGN_PAYMENT_STATUSES = ["ALLOWED", "DISALLOWED"] + + +class YandexCampaignReaderConfig(BaseModel): + token: str + campaign_ids: List[str] = [] + campaign_states: List[Literal[tuple(CAMPAIGN_STATES)]] = [] + campaign_statuses: List[Literal[tuple(CAMPAIGN_STATUSES)]] = [] + campaign_payment_statuses: List[Literal[tuple(CAMPAIGN_PAYMENT_STATUSES)]] = [] + fields: List[Literal[tuple(CAMPAIGN_FIELDS)]] = [] diff --git a/nck/readers/yandex_campaign_reader.py b/ack/readers/yandex_campaign/reader.py similarity index 53% rename from nck/readers/yandex_campaign_reader.py rename to ack/readers/yandex_campaign/reader.py index d1142907..f562bc5b 100644 --- a/nck/readers/yandex_campaign_reader.py +++ b/ack/readers/yandex_campaign/reader.py @@ -15,71 +15,16 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click -import nck.helpers.api_client_helper as api_client_helper -from nck.clients.api_client import ApiClient -from nck.commands.command import processor -from nck.helpers.yandex_helper import (CAMPAIGN_FIELDS, CAMPAIGN_STATES, - CAMPAIGN_STATUSES, CAMPAIGN_PAYMENT_STATUSES) -from nck.readers.reader import Reader -from nck.streams.json_stream import JSONStream -from nck.utils.args import extract_args - - -@click.command(name="read_yandex_campaigns") -@click.option("--yandex-token", required=True) -@click.option( - "--yandex-campaign-id", - "yandex_campaign_ids", - multiple=True -) -@click.option( - "--yandex-campaign-state", - "yandex_campaign_states", - multiple=True, - type=click.Choice(CAMPAIGN_STATES) -) -@click.option( - "--yandex-campaign-status", - "yandex_campaign_statuses", - multiple=True, - type=click.Choice(CAMPAIGN_STATUSES) -) -@click.option( - "--yandex-campaign-payment-status", - "yandex_campaign_payment_statuses", - multiple=True, - type=click.Choice(CAMPAIGN_PAYMENT_STATUSES) -) -@click.option( - "--yandex-field-name", - "yandex_fields", - multiple=True, - type=click.Choice(CAMPAIGN_FIELDS), - required=True, - help=( - "Fields to output in the report (columns)." - "For the full list of fields and their meanings, " - "see https://tech.yandex.com/direct/doc/reports/fields-list-docpage/" - ) -) -@processor("yandex_token") -def yandex_campaigns(**kwargs): - return YandexCampaignReader(**extract_args("yandex_", kwargs)) - - -YANDEX_DIRECT_API_BASE_URL = "https://api.direct.yandex.com/json/v5/" +import ack.clients.api.helper as api_client_helper +from ack.clients.api.client import ApiClient +from ack.readers.reader import Reader +from ack.readers.yandex_campaign.config import YANDEX_DIRECT_API_BASE_URL +from ack.streams.json_stream import JSONStream class YandexCampaignReader(Reader): - - def __init__( - self, - token, - fields, - **kwargs - ): + def __init__(self, token, fields, **kwargs): self.token = token self.fields = list(fields) self.campaign_ids = list(kwargs["campaign_ids"]) @@ -106,13 +51,9 @@ def _build_request_body(self): if len(self.campaign_payment_statuses) != 0: selection_criteria["StatusesPayment"] = self.campaign_payment_statuses body["params"] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( - field_names=self.fields, - selection_criteria=selection_criteria + field_names=self.fields, selection_criteria=selection_criteria ) return body def read(self): - yield JSONStream( - "results_CAMPAIGN_OBJECT_REPORT_", - self.result_generator() - ) + yield JSONStream("results_CAMPAIGN_OBJECT_REPORT_", self.result_generator()) diff --git a/ack/readers/yandex_statistics/__init__.py b/ack/readers/yandex_statistics/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/readers/yandex_statistics/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/readers/yandex_statistics/cli.py b/ack/readers/yandex_statistics/cli.py new file mode 100644 index 00000000..10638cd4 --- /dev/null +++ b/ack/readers/yandex_statistics/cli.py @@ -0,0 +1,77 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import datetime +import random + +import click +from ack.readers.yandex_statistics.config import DATE_RANGE_TYPES, LANGUAGES, OPERATORS, REPORT_TYPES, STATS_FIELDS +from ack.readers.yandex_statistics.reader import YandexStatisticsReader +from ack.utils.args import extract_args +from ack.utils.processor import processor + + +class StrList(click.ParamType): + def convert(self, value, param, ctx): + return value.split(",") + + +STR_LIST_TYPE = StrList() + + +@click.command(name="read_yandex_statistics") +@click.option("--yandex-statistics-token", "yandex_token", required=True) +@click.option("--yandex-statistics-report-language", "yandex_report_language", type=click.Choice(LANGUAGES), default="en") +@click.option( + "--yandex-statistics-filter", + "yandex_filters", + multiple=True, + type=click.Tuple([click.Choice(STATS_FIELDS), click.Choice(OPERATORS), STR_LIST_TYPE]), +) +@click.option("--yandex-statistics-max-rows", "yandex_max_rows", type=int) +@click.option( + "--yandex-statistics-field-name", + "yandex_fields", + multiple=True, + type=click.Choice(STATS_FIELDS), + required=True, + help=( + "Fields to output in the report (columns)." + "For the full list of fields and their meanings, " + "see https://tech.yandex.com/direct/doc/reports/fields-list-docpage/" + ), +) +@click.option( + "--yandex-statistics-report-name", + "yandex_report_name", + default=f"stats_report_{datetime.date.today()}_{random.randrange(10000)}", +) +@click.option("--yandex-statistics-report-type", "yandex_report_type", type=click.Choice(REPORT_TYPES), required=True) +@click.option("--yandex-statistics-date-range", "yandex_date_range", type=click.Choice(DATE_RANGE_TYPES), required=True) +@click.option( + "--yandex-statistics-include-vat", + "yandex_include_vat", + type=click.BOOL, + required=True, + help="Whether to include VAT in the monetary amounts in the report.", +) +@click.option("--yandex-statistics-date-start", "yandex_date_start", type=click.DateTime()) +@click.option("--yandex-statistics-date-stop", "yandex_date_stop", type=click.DateTime()) +@processor("yandex_token") +def yandex_statistics(**kwargs): + return YandexStatisticsReader(**extract_args("yandex_", kwargs)) diff --git a/ack/readers/yandex_statistics/config.py b/ack/readers/yandex_statistics/config.py new file mode 100644 index 00000000..d670c4c9 --- /dev/null +++ b/ack/readers/yandex_statistics/config.py @@ -0,0 +1,169 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import random +import datetime +from typing import Literal, List, Tuple + +from pydantic import BaseModel, validator + +YANDEX_DIRECT_API_BASE_URL = "https://api.direct.yandex.com/json/v5/" + +LANGUAGES = ["en", "ru", "uk"] + +REPORT_TYPES = [ + "ACCOUNT_PERFORMANCE_REPORT", + "CAMPAIGN_PERFORMANCE_REPORT", + "ADGROUP_PERFORMANCE_REPORT", + "AD_PERFORMANCE_REPORT", + "CRITERIA_PERFORMANCE_REPORT", + "CUSTOM_REPORT", + "REACH_AND_FREQUENCY_PERFORMANCE_REPORT", + "SEARCH_QUERY_PERFORMANCE_REPORT", +] + +STATS_FIELDS = [ + "AdFormat", + "AdGroupId", + "AdGroupName", + "AdId", + "AdNetworkType", + "Age", + "AudienceTargetId", + "AvgClickPosition", + "AvgCpc", + "AvgCpm", + "AvgImpressionFrequency", + "AvgImpressionPosition", + "AvgPageviews", + "AvgTrafficVolume", + "BounceRate", + "Bounces", + "CampaignId", + "CampaignName", + "CampaignType", + "CarrierType", + "Clicks", + "ClickType", + "ConversionRate", + "Conversions", + "Cost", + "CostPerConversion", + "Criteria", + "CriteriaId", + "CriteriaType", + "Criterion", + "CriterionId", + "CriterionType", + "Ctr", + "Date", + "Device", + "DynamicTextAdTargetId", + "ExternalNetworkName", + "Gender", + "GoalsRoi", + "ImpressionReach", + "Impressions", + "ImpressionShare", + "Keyword", + "LocationOfPresenceId", + "LocationOfPresenceName", + "MatchedKeyword", + "MatchType", + "MobilePlatform", + "Month", + "Placement", + "Profit", + "Quarter", + "Query", + "Revenue", + "RlAdjustmentId", + "Sessions", + "Slot", + "SmartBannerFilterId", + "TargetingLocationId", + "TargetingLocationName", + "Week", + "WeightedCtr", + "WeightedImpressions", + "Year", +] + +DATE_RANGE_TYPES = [ + "TODAY", + "YESTERDAY", + "THIS_WEEK_MON_TODAY", + "THIS_WEEK_SUN_TODAY", + "LAST_WEEK", + "LAST_BUSINESS_WEEK", + "LAST_WEEK_SUN_SAT", + "THIS_MONTH", + "LAST_MONTH", + "ALL_TIME", + "CUSTOM_DATE", + "AUT0", + "LAST_3_DAYS", + "LAST_5_DAYS", + "LAST_7_DAYS", + "LAST_14_DAYS", + "LAST_30_DAYS", + "LAST_90_DAYS", + "LAST_365_DAYS", +] + +OPERATORS = [ + "EQUALS", + "NOT_EQUALS", + "IN", + "NOT_IN", + "LESS_THAN", + "GREATER_THAN", + "STARTS_WITH_IGNORE_CASE", + "DOES_NOT_START_WITH_IGNORE_CASE", + "STARTS_WITH_ANY_IGNORE_CASE", + "DOES_NOT_START_WITH_ALL_IGNORE_CASE", +] + + +class YandexStatisticsReaderConfig(BaseModel): + token: str + report_language: Literal[tuple(LANGUAGES)] = "en" + filters: List[Tuple[Literal[tuple(STATS_FIELDS)], Literal[tuple(OPERATORS)], str]] = [] + max_rows: int = None + fields: List[Literal[tuple(STATS_FIELDS)]] + report_name: str = f"stats_report_{datetime.date.today()}_{random.randrange(10000)}" + report_type: Literal[tuple(REPORT_TYPES)] + date_range: Literal[tuple(DATE_RANGE_TYPES)] + include_vat: bool + date_start: datetime.datetime = None + date_stop: datetime.datetime = None + + @validator("date_start", "date_stop", pre=True) + def date_format(cls, v): + if isinstance(v, str): + try: + return datetime.datetime.strptime(v, "%Y-%m-%d") + except ValueError: + raise ValueError("Datetime format must follow 'YYYY-MM-DD'") + return v + + @validator("filters") + def filters_str_to_list(cls, v): + for i in range(len(v)): + v[i] = list(v[i]) + v[i][2] = v[i][2].split(",") + return v diff --git a/ack/readers/yandex_statistics/reader.py b/ack/readers/yandex_statistics/reader.py new file mode 100644 index 00000000..db03c61c --- /dev/null +++ b/ack/readers/yandex_statistics/reader.py @@ -0,0 +1,131 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import time +from http import HTTPStatus +from typing import Dict, Tuple + +import ack.clients.api.helper as api_client_helper +from click import ClickException +from ack.clients.api.client import ApiClient +from ack.config import logger +from ack.readers.reader import Reader +from ack.readers.yandex_statistics.config import YANDEX_DIRECT_API_BASE_URL +from ack.streams.json_stream import JSONStream +from ack.utils.text import get_report_generator_from_flat_file + + +class YandexStatisticsReader(Reader): + def __init__( + self, token, fields: Tuple[str], report_type: str, report_name: str, date_range: str, include_vat: bool, **kwargs, + ): + self.token = token + self.fields = list(fields) + self.report_type = report_type + self.report_name = report_name + self.date_range = date_range + self.include_vat = include_vat + self.kwargs = kwargs + + def result_generator(self): + api_client = ApiClient(self.token, YANDEX_DIRECT_API_BASE_URL) + body = self._build_request_body() + headers = self._build_request_headers() + while True: + response = api_client.execute_request(url="reports", body=body, headers=headers, stream=True) + if response.status_code == HTTPStatus.CREATED: + waiting_time = int(response.headers["retryIn"]) + logger.info(f"Report added to queue. Should be ready in {waiting_time} min.") + time.sleep(waiting_time * 60) + elif response.status_code == HTTPStatus.ACCEPTED: + logger.info("Report in queue.") + elif response.status_code == HTTPStatus.OK: + logger.info("Report successfully retrieved.") + + return get_report_generator_from_flat_file(response.iter_lines(), delimiter="\t", skip_n_first=1,) + + return get_report_generator_from_flat_file(response.iter_lines(), delimiter="\t", skip_n_first=1,) + + elif response.status_code == HTTPStatus.BAD_REQUEST: + logger.error("Invalid request.") + logger.error(response.json()) + break + elif response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR: + logger.error("Internal server error.") + logger.error(response.json()) + break + else: + logger.error(response.json()) + break + return None + + def _build_request_body(self) -> Dict: + body = {} + selection_criteria = self._add_custom_dates_if_set() + if len(self.kwargs["filters"]) > 0: + selection_criteria["Filter"] = [ + api_client_helper.get_dict_with_keys_converted_to_new_string_format( + field=filter_element[0], operator=filter_element[1], values=filter_element[2], + ) + for filter_element in self.kwargs["filters"] + ] + body["params"] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( + selection_criteria=selection_criteria, + field_names=self.fields, + report_name=self.report_name, + report_type=self.report_type, + date_range_type=self.date_range, + format="TSV", + include_v_a_t="YES" if self.include_vat else "NO", + ) + if self.kwargs["max_rows"] is not None: + body["params"]["Page"] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( + limit=self.kwargs["max_rows"] + ) + return body + + def _build_request_headers(self) -> Dict: + return { + "skipReportSummary": "true", + "Accept-Language": self.kwargs["report_language"], + } + + def _add_custom_dates_if_set(self) -> Dict: + selection_criteria = {} + if self.kwargs["date_start"] is not None and self.kwargs["date_stop"] is not None and self.date_range == "CUSTOM_DATE": + selection_criteria["DateFrom"] = self.kwargs["date_start"].strftime("%Y-%m-%d") + selection_criteria["DateTo"] = self.kwargs["date_stop"].strftime("%Y-%m-%d") + elif ( + self.kwargs["date_start"] is not None and self.kwargs["date_stop"] is not None and self.date_range != "CUSTOM_DATE" + ): + raise ClickException("Wrong date range. If start and stop dates are set, should be CUSTOM_DATE.") + elif ( + self.kwargs["date_start"] is not None or self.kwargs["date_stop"] is not None + ) and self.date_range != "CUSTOM_DATE": + raise ClickException( + ( + "Wrong combination of date parameters. " + "Only use date start and date stop with date range set to CUSTOM_DATE." + ) + ) + elif (self.kwargs["date_start"] is None or self.kwargs["date_stop"] is None) and self.date_range == "CUSTOM_DATE": + raise ClickException("Missing at least one date. Have you set start and stop dates?") + return selection_criteria + + def read(self): + yield JSONStream(f"results_{self.report_type}", self.result_generator()) diff --git a/ack/streams/__init__.py b/ack/streams/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/streams/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/nck/streams/format_date_stream.py b/ack/streams/format_date_stream.py similarity index 89% rename from nck/streams/format_date_stream.py rename to ack/streams/format_date_stream.py index 9bc59a1d..38ce3b37 100644 --- a/nck/streams/format_date_stream.py +++ b/ack/streams/format_date_stream.py @@ -15,16 +15,16 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from nck.streams.json_stream import JSONStream +from ack.streams.json_stream import JSONStream import dateutil.parser from datetime import datetime class FormatDateStream(JSONStream): keys = [] - date_format = '%Y-%m-%d' + date_format = "%Y-%m-%d" - def __init__(self, name, source_generator, keys: [] = None, date_format: str = '%Y-%m-%d'): + def __init__(self, name, source_generator, keys: [] = None, date_format: str = "%Y-%m-%d"): super().__init__(name, source_generator) FormatDateStream.keys = keys FormatDateStream.date_format = date_format @@ -45,5 +45,5 @@ def _parse_record(cls, o): @classmethod def _format_date(cls, v): parsed_date = dateutil.parser.parse(v) - datetimeobject = datetime.strptime(str(parsed_date), '%Y-%m-%d %H:%M:%S') + datetimeobject = datetime.strptime(str(parsed_date), "%Y-%m-%d %H:%M:%S") return datetimeobject.strftime(cls.date_format) diff --git a/nck/streams/json_stream.py b/ack/streams/json_stream.py similarity index 96% rename from nck/streams/json_stream.py rename to ack/streams/json_stream.py index e8bd603d..6a5d7d60 100644 --- a/nck/streams/json_stream.py +++ b/ack/streams/json_stream.py @@ -17,7 +17,7 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import json -from nck.streams.stream import Stream +from ack.streams.stream import Stream class JSONStream(Stream): diff --git a/nck/streams/normalized_json_stream.py b/ack/streams/normalized_json_stream.py similarity index 91% rename from nck/streams/normalized_json_stream.py rename to ack/streams/normalized_json_stream.py index 1bc34eb8..7a877a5a 100644 --- a/nck/streams/normalized_json_stream.py +++ b/ack/streams/normalized_json_stream.py @@ -15,15 +15,13 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from nck.streams.json_stream import JSONStream +from ack.streams.json_stream import JSONStream class NormalizedJSONStream(JSONStream): @classmethod def encode_record(cls, record): - return super(NormalizedJSONStream, cls).encode_record( - cls._normalize_keys(record) - ) + return super(NormalizedJSONStream, cls).encode_record(cls._normalize_keys(record)) @classmethod def _normalize_keys(cls, o): diff --git a/nck/streams/pickle_stream.py b/ack/streams/pickle_stream.py similarity index 96% rename from nck/streams/pickle_stream.py rename to ack/streams/pickle_stream.py index 08cfa15f..29c6b30a 100644 --- a/nck/streams/pickle_stream.py +++ b/ack/streams/pickle_stream.py @@ -17,7 +17,7 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import pickle -from nck.streams.stream import Stream +from ack.streams.stream import Stream class PickleStream(Stream): diff --git a/nck/streams/stream.py b/ack/streams/stream.py similarity index 94% rename from nck/streams/stream.py rename to ack/streams/stream.py index b5101629..77cdbc7a 100644 --- a/nck/streams/stream.py +++ b/ack/streams/stream.py @@ -79,7 +79,7 @@ def decode_record(cls, record): def create_stream_name(name): ts = time.time() ts_as_string = datetime.fromtimestamp(ts).strftime("%Y-%m-%d-%H-%M-%S") - return "{}_{}".format(name, ts_as_string) + return f"{name}_{ts_as_string}" @property def name(self): @@ -110,9 +110,9 @@ def readable(self): def readinto(self, b): try: - chunck_length = len(b) # We're supposed to return at most this much + chunk_length = len(b) # We're supposed to return at most this much chunk = self.leftover or encode(next(iterable)) - output, self.leftover = chunk[:chunck_length], chunk[chunck_length:] + output, self.leftover = chunk[:chunk_length], chunk[chunk_length:] b[: len(output)] = output self.count += len(output) return len(output) diff --git a/ack/utils/__init__.py b/ack/utils/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/utils/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/nck/utils/args.py b/ack/utils/args.py similarity index 100% rename from nck/utils/args.py rename to ack/utils/args.py diff --git a/ack/utils/date_handler.py b/ack/utils/date_handler.py new file mode 100644 index 00000000..368c25ab --- /dev/null +++ b/ack/utils/date_handler.py @@ -0,0 +1,94 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import calendar +from datetime import date, timedelta, datetime +from typing import Tuple + +from ack.utils.exceptions import DateDefinitionException + + +def __get_yesterday_date(current_date: date) -> Tuple[date, date]: + yesterday = current_date - timedelta(days=1) + return yesterday, yesterday + + +def __get_last_7d_dates(current_date: date) -> Tuple[date, date]: + return current_date - timedelta(days=8), current_date - timedelta(days=1) + + +def __get_last_90d_dates(current_date: date) -> Tuple[date, date]: + return current_date - timedelta(days=91), current_date - timedelta(days=1) + + +def __get_previous_week_dates(current_date: date) -> Tuple[date, date]: + first_day_of_last_week = current_date - timedelta(days=current_date.weekday(), weeks=1) + return first_day_of_last_week, first_day_of_last_week + timedelta(days=6) + + +def __get_previous_month_dates(current_date: date) -> Tuple[date, date]: + last_day_of_previous_month = current_date.replace(day=1) - timedelta(days=1) + year = last_day_of_previous_month.year + month = last_day_of_previous_month.month + return date(year, month, 1), date(year, month, calendar.monthrange(year, month)[1]) + + +DEFAULT_DATE_RANGE_FUNCTIONS = { + "YESTERDAY": __get_yesterday_date, + "LAST_7_DAYS": __get_last_7d_dates, + "PREVIOUS_WEEK": __get_previous_week_dates, + "PREVIOUS_MONTH": __get_previous_month_dates, + "LAST_90_DAYS": __get_last_90d_dates, +} + + +def check_date_range_definition_conformity(start_date: date, end_date: date, date_range: str): + + if date_range: + if any([start_date, end_date]): + raise DateDefinitionException("You must define either (start_date, end_date) or date_range") + else: + if not all([start_date, end_date]): + raise DateDefinitionException("You must at least define a couple (start-date, end-date) or a date-range") + elif end_date < start_date: + raise DateDefinitionException("Report end date should be equal or ulterior to report start date.") + + +def get_date_start_and_date_stop_from_date_range(date_range: str) -> Tuple[date, date]: + """Returns date start and date stop based on the date range provided + and the current date. + + Args: + date_range (str): One of the default date ranges that exist + + Returns: + Tuple[date, date]: date start and date stop that match the date range + """ + current_date = date.today() + return DEFAULT_DATE_RANGE_FUNCTIONS[date_range](current_date) + + +def build_date_range(start_date: date, end_date: date, date_range: str) -> Tuple[datetime, datetime]: + check_date_range_definition_conformity(start_date, end_date, date_range) + + if date_range is not None: + start_date, end_date = get_date_start_and_date_stop_from_date_range(date_range) + start_date = datetime(start_date.year, start_date.month, start_date.day) + end_date = datetime(end_date.year, end_date.month, end_date.day) + + return start_date, end_date diff --git a/ack/utils/exceptions.py b/ack/utils/exceptions.py new file mode 100644 index 00000000..55752c40 --- /dev/null +++ b/ack/utils/exceptions.py @@ -0,0 +1,89 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + +class RetryTimeoutError(Exception): + """Raised when a query exceeds it's time limit threshold.""" + + pass + + +class SdfOperationError(Exception): + """Raised when a sdf operation has failed.""" + + pass + + +class DateDefinitionException(Exception): + """Raised when the date parameters are not valid""" + + pass + + +class NoDateDefinitionException(Exception): + """Raised when no date range or start date/end date is defined""" + + pass + + +class MissingDateDefinitionException(Exception): + """Raised when either the start date or end date is missing""" + + pass + + +class InconsistentDateDefinitionException(Exception): + """Raised when both start date/end date and date range are defined""" + + pass + + +class MissingItemsInResponse(Exception): + """Raised when the body of the response is missing items""" + + pass + + +class APIRateLimitError(Exception): + """Raised when the API rate limit is reached""" + + pass + + +class ReportDescriptionError(Exception): + """Raised when report description is not valid""" + + pass + + +class ReportNotReadyError(Exception): + """Raised when report is not ready yet""" + + pass + + +class ReportTemplateNotFoundError(Exception): + """Raised when The Trade Desk report template was not found""" + + pass + + +class ReportScheduleNotReadyError(Exception): + """Raised when The Trade Desk report schedule is not ready yet""" + + pass diff --git a/nck/utils/file_reader.py b/ack/utils/file_reader.py similarity index 65% rename from nck/utils/file_reader.py rename to ack/utils/file_reader.py index 149e9208..7e004007 100644 --- a/nck/utils/file_reader.py +++ b/ack/utils/file_reader.py @@ -15,27 +15,35 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from enum import Enum -import csv + import codecs +import csv import gzip -import zipfile import json +import zipfile + +csv.field_size_limit(1000000) def unzip(input_file, output_path): - with zipfile.ZipFile(input_file, 'r') as zip_ref: + with zipfile.ZipFile(input_file, "r") as zip_ref: zip_ref.extractall(output_path) def sdf_to_njson_generator(path_to_file): csv_reader = CSVReader(csv_delimiter=",", csv_fieldnames=None) with open(path_to_file, "rb") as fd: - dict_reader = csv_reader.read_csv(fd) + dict_reader = csv_reader.read(fd) for line in dict_reader: yield line +def read_json(path_to_file): + with open(path_to_file) as f: + data = json.load(f) + return data + + def format_csv_delimiter(csv_delimiter): _csv_delimiter = csv_delimiter.encode().decode("unicode_escape") if csv_delimiter == "newline": @@ -51,20 +59,41 @@ def format_csv_fieldnames(csv_fieldnames): elif isinstance(csv_fieldnames, (str, bytes)): _csv_fieldnames = json.loads(csv_fieldnames) else: - raise TypeError( - f"The CSV fieldnames is of the following type: {type(csv_fieldnames)}." - ) + raise TypeError(f"The CSV fieldnames is of the following type: {type(csv_fieldnames)}.") assert isinstance(_csv_fieldnames, list) return _csv_fieldnames -class CSVReader(object): +def create_file_reader(_format, **kwargs): + if _format == "csv": + return CSVReader(**kwargs) + if _format == "gz": + return GZReader(**kwargs) + if _format == "njson": + return NJSONReader(**kwargs) + else: + raise NotImplementedError(f"The file format {str(_format)} has not been implemented for reading yet.") + + +class FileReader: + def __init__(self, **kwargs): + self.reader = lambda fd: self.read(fd, **kwargs) + + def read(self, fd, **kwargs): + fd.seek(0) + return codecs.iterdecode(fd, encoding="utf8") + + def get_reader(self): + return self.reader + + +class CSVReader(FileReader): def __init__(self, csv_delimiter, csv_fieldnames, **kwargs): self.csv_delimiter = format_csv_delimiter(csv_delimiter) self.csv_fieldnames = format_csv_fieldnames(csv_fieldnames) if csv_fieldnames is not None else None - self.csv_reader = lambda fd: self.read_csv(fd, **kwargs) + super().__init__(**kwargs) - def read_csv(self, fd, **kwargs): + def read(self, fd, **kwargs): fd.seek(0) fd = self.decompress(fd) return csv.DictReader( @@ -77,9 +106,6 @@ def read_csv(self, fd, **kwargs): def decompress(self, fd): return fd - def get_csv_reader(self): - return self.csv_reader - class GZReader(CSVReader): def decompress(self, fd): @@ -87,6 +113,11 @@ def decompress(self, fd): return gzf -class FileEnum(Enum): - CSV = CSVReader - GZ = GZReader +class NJSONReader(FileReader): + def read(self, fd, **kwargs): + fd.seek(0) + return self.jsongene(fd, **kwargs) + + def jsongene(self, fd, **kwargs): + for line in codecs.iterdecode(fd, encoding="utf8"): + yield json.loads(line) diff --git a/ack/utils/formatter.py b/ack/utils/formatter.py new file mode 100644 index 00000000..19a9f4a2 --- /dev/null +++ b/ack/utils/formatter.py @@ -0,0 +1,43 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from typing import Dict, List + +from ack.readers.reader import Reader +from ack.entrypoints.json.readers import readers_classes +from ack.writers.writer import Writer +from ack.entrypoints.json.writers import writers_classes + + +def format_reader(reader: Dict) -> Reader: + reader_name = reader.pop("name") + config = readers_classes[reader_name][1](**reader) + return readers_classes[reader_name][0](**config.dict()) + + +def format_writers(writers: List[Dict]) -> [Writer]: + writers_list = [] + for writer in writers: + writer_name = writer.pop("name") + # if there is a config class/file => there is arguments + if len(writers_classes[writer_name]) > 1: + config = writers_classes[writer_name][1](**writer) + writers_list.append(writers_classes[writer_name][0](**config.dict())) + else: + writers_list.append(writers_classes[writer_name][0]()) + + return writers_list diff --git a/nck/commands/command.py b/ack/utils/processor.py similarity index 93% rename from nck/commands/command.py rename to ack/utils/processor.py index 780799fe..2ba53c02 100644 --- a/nck/commands/command.py +++ b/ack/utils/processor.py @@ -15,8 +15,10 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + from functools import update_wrapper -import logging + +from ack.config import logger def processor(*sensitive_fields): @@ -35,7 +37,7 @@ def new_func(*args, **kwargs): else: _kwargs[key] = value - logging.info("Calling %s with (%s)", f.__name__, _kwargs) + logger.info(f"Calling {f.__name__} with ({_kwargs})") def processor(): return f(*args, **kwargs) diff --git a/nck/state_service.py b/ack/utils/redis.py similarity index 55% rename from nck/state_service.py rename to ack/utils/redis.py index a2db1860..c477a353 100644 --- a/nck/state_service.py +++ b/ack/utils/redis.py @@ -15,35 +15,18 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import redis -import logging -import pickle - -_state_service = None - - -def state(): - global _state_service - if not _state_service: - raise Exception("State Service has not been configured") - - return _state_service +import pickle -def configure(name, host, port): - global _state_service - if _state_service: - raise Exception("State Service already configured") +from ack.config import logger - _state_service = StateService(name, host, port) +import redis -class StateService(object): +class RedisStateService: def __init__(self, name, host, port=6379): - if host: - logging.info("Using checkpointing service: %s:%d (%s)", host, port, name) - + logger.info(f"Using checkpointing service: {host}:{port} ({name})") self._enabled = True self._name = name self._host = host @@ -51,23 +34,12 @@ def __init__(self, name, host, port=6379): self._client = redis.Redis(host=host, port=port) else: self._enabled = False - logging.info("No checkpointing") - - def get(self, key, default=None): - if not self.enabled: - return default + logger.info("No checkpointing") - if not self._client.hexists(self._name, key): - return default - - return pickle.loads(self._client.hget(self._name, key)) + def get(self, key): + if self._enabled and self._client.hexists(self._name, key): + return pickle.loads(self._client.hget(self._name, key)) def set(self, key, value): - if not self.enabled: - return - - self._client.hset(self._name, key, pickle.dumps(value)) - - @property - def enabled(self): - return self._enabled + if self._enabled: + self._client.hset(self._name, key, pickle.dumps(value)) diff --git a/nck/utils/retry.py b/ack/utils/retry.py similarity index 78% rename from nck/utils/retry.py rename to ack/utils/retry.py index ef7b7625..30cdb300 100644 --- a/nck/utils/retry.py +++ b/ack/utils/retry.py @@ -15,22 +15,20 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from tenacity import ( - retry as _retry, - wait_exponential, - before_sleep_log, - before_log, - stop_after_attempt, -) -import config + import logging +from ack.config import logger +from tenacity import before_log, before_sleep_log +from tenacity import retry as _retry +from tenacity import stop_after_attempt, wait_exponential + def retry(fn): return _retry( wait=wait_exponential(multiplier=1, min=4, max=10), stop=stop_after_attempt(5), reraise=True, - before=before_log(config.logger, logging.INFO), - before_sleep=before_sleep_log(config.logger, logging.INFO), + before=before_log(logger, logging.INFO), + before_sleep=before_sleep_log(logger, logging.INFO), )(fn) diff --git a/ack/utils/stdout_to_log.py b/ack/utils/stdout_to_log.py new file mode 100644 index 00000000..b88a3435 --- /dev/null +++ b/ack/utils/stdout_to_log.py @@ -0,0 +1,87 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import logging +import sys + +import httplib2 + + +class STDoutToLog: + def __init__(self, logger_name, level): + self.content = [] + self.logg = logging.getLogger(logger_name) + self.level = level + + def write(self, string): + if not string.endswith("\n"): + self.content.append(string) + else: + debug_info = ( + "".join(self.content) + .replace("\\r", "") + .encode("latin1") + .decode("unicode-escape") + .encode("latin1") + .decode("utf-8") + .replace("'", "") + ) + + debug_info = "\n".join([ll.rstrip() for ll in debug_info.splitlines() if ll.strip()]) + self.logg.log(self.level, debug_info) + self.content = [] + + def flush(self): + pass + + +def http_log(logger_name, level=logging.DEBUG): + def decorator(func): + def wrapper(*args, **kwargs): + httplib2.debuglevel = 4 + + httpLog = STDoutToLog(logger_name, level) + sys.stdout = httpLog + + items = [] + for item in func(*args, **kwargs): + items.append(item) + + sys.stdout = sys.__stdout__ + + for item in items: + yield item + + return wrapper + + return decorator + + +def http_log_for_init(logger_name, level=logging.DEBUG): + def decorator(func): + def wrapper(*args, **kwargs): + httplib2.debuglevel = 4 + + httpLog = STDoutToLog(logger_name, level) + sys.stdout = httpLog + func(*args, **kwargs) + sys.stdout = sys.__stdout__ + + return wrapper + + return decorator diff --git a/nck/utils/text.py b/ack/utils/text.py similarity index 89% rename from nck/utils/text.py rename to ack/utils/text.py index 50621451..a0227dfb 100644 --- a/nck/utils/text.py +++ b/ack/utils/text.py @@ -15,21 +15,18 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging -import re + import csv -from io import StringIO +import re from collections import deque +from io import StringIO from itertools import islice +from ack.config import logger + def get_report_generator_from_flat_file( - line_iterator, - delimiter=",", - skip_n_first=0, - skip_n_last=0, - add_column=False, - column_dict={}, + line_iterator, delimiter=",", skip_n_first=0, skip_n_last=0, add_column=False, column_dict={}, ): """ From the line iterator of a flat file: @@ -62,9 +59,7 @@ def get_report_generator_from_flat_file( else: parsed_line = parse_decoded_line(line, delimiter) if len(parsed_line) != len(headers): - logging.warning( - f"Skipping line '{line}': length of parsed line doesn't match length of headers." - ) + logger.warning(f"Skipping line '{line}': length of parsed line doesn't match length of headers.") else: record = dict(zip(headers, parsed_line)) if add_column: @@ -78,7 +73,7 @@ def decode_if_needed(line): try: line = line.decode("utf-8") except UnicodeDecodeError as e: - logging.warning( + logger.warning( "An error has occurred while parsing the file." f"The line could not be decoded in {e.encoding}." f"Invalid input that the codec failed on: {e.object[e.start : e.end]}" @@ -89,13 +84,7 @@ def decode_if_needed(line): def parse_decoded_line(line, delimiter=",", quotechar='"'): line_as_file = StringIO(line) - reader = csv.reader( - line_as_file, - delimiter=delimiter, - quotechar=quotechar, - quoting=csv.QUOTE_ALL, - skipinitialspace=True, - ) + reader = csv.reader(line_as_file, delimiter=delimiter, quotechar=quotechar, quoting=csv.QUOTE_ALL, skipinitialspace=True,) return next(reader) @@ -134,3 +123,7 @@ def reformat_naming_for_bq(text, char="_"): text = re.sub(r"[\s\W]+", char, text) text = re.sub(r"[" + char + "]+", char, text.strip()) return text.lower() + + +def strip_prefix(text, prefix): + return re.split(prefix, text)[-1] diff --git a/ack/writers/__init__.py b/ack/writers/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/writers/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/writers/amazon_s3/__init__.py b/ack/writers/amazon_s3/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/writers/amazon_s3/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/writers/amazon_s3/cli.py b/ack/writers/amazon_s3/cli.py new file mode 100644 index 00000000..04645493 --- /dev/null +++ b/ack/writers/amazon_s3/cli.py @@ -0,0 +1,34 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.utils.args import extract_args +from ack.utils.processor import processor +from ack.writers.amazon_s3.writer import AmazonS3Writer + + +@click.command(name="write_s3") +@click.option("--s3-bucket-name", help="S3 Bucket name", required=True) +@click.option("--s3-bucket-region", required=True) +@click.option("--s3-access-key-id", required=True) +@click.option("--s3-access-key-secret", required=True) +@click.option("--s3-prefix", help="s3 Prefix", default=None) +@click.option("--s3-filename", help="Override the default name of the file (don't add the extension)") +@processor("s3_access_key_id", "s3_access_key_secret") +def amazon_s3(**kwargs): + return AmazonS3Writer(**extract_args("s3_", kwargs)) diff --git a/ack/writers/amazon_s3/config.py b/ack/writers/amazon_s3/config.py new file mode 100644 index 00000000..8074adb3 --- /dev/null +++ b/ack/writers/amazon_s3/config.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel + + +class AmazonS3WriterConfig(BaseModel): + bucket_name: str + bucket_region: str + access_key_id: str + access_key_secret: str + prefix: str = None + filename: str diff --git a/ack/writers/amazon_s3/writer.py b/ack/writers/amazon_s3/writer.py new file mode 100644 index 00000000..77436a31 --- /dev/null +++ b/ack/writers/amazon_s3/writer.py @@ -0,0 +1,49 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import boto3 +from ack.writers.object_storage.writer import ObjectStorageWriter +from ack.utils.retry import retry + + +class AmazonS3Writer(ObjectStorageWriter): + + _write_aux = retry(ObjectStorageWriter._write_aux) + + def __init__(self, bucket_name, bucket_region, access_key_id, access_key_secret, prefix=None, filename=None, **kwargs): + self.boto_config = { + "region_name": bucket_region, + "aws_access_key_id": access_key_id, + "aws_secret_access_key": access_key_secret, + } + super().__init__(bucket_name=bucket_name, prefix=prefix, file_name=filename, platform="S3", **kwargs) + + def _create_client(self): + return boto3.resource("s3", **self.boto_config) + + def _create_bucket(self, client): + return client.Bucket(self._bucket_name) + + def _list_buckets(self, client): + return client.buckets.all() + + def _create_blob(self, file_name, stream): + self._bucket.upload_fileobj(stream.as_file(), file_name) + + def _get_uri(self, file_name): + return f"s3{self._get_file_path(file_name)}" diff --git a/ack/writers/console/__init__.py b/ack/writers/console/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/writers/console/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/writers/test_gcs_writer.py b/ack/writers/console/cli.py similarity index 74% rename from tests/writers/test_gcs_writer.py rename to ack/writers/console/cli.py index 5a250098..c3e7270f 100644 --- a/tests/writers/test_gcs_writer.py +++ b/ack/writers/console/cli.py @@ -15,12 +15,14 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import unittest -from nck.writers.gcs_writer import GCSWriter +import click +from ack.utils.args import extract_args +from ack.utils.processor import processor +from ack.writers.console.writer import ConsoleWriter -class TestGCSWriter(unittest.TestCase): - def test_extract_extension(self): - filename = "test.py" - print(GCSWriter._extract_extension(filename)) - assert GCSWriter._extract_extension(filename) == ("test", ".py") + +@click.command(name="write_console") +@processor() +def console(**kwargs): + return ConsoleWriter(**extract_args("console_", kwargs)) diff --git a/nck/writers/console_writer.py b/ack/writers/console/writer.py similarity index 78% rename from nck/writers/console_writer.py rename to ack/writers/console/writer.py index cede0720..304941d0 100644 --- a/nck/writers/console_writer.py +++ b/ack/writers/console/writer.py @@ -15,18 +15,10 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click -import sys - -from nck.writers.writer import Writer -from nck.commands.command import processor -from nck.utils.args import extract_args +import sys -@click.command(name="write_console") -@processor() -def console(**kwargs): - return ConsoleWriter(**extract_args("console_", kwargs)) +from ack.writers.writer import Writer class ConsoleWriter(Writer): @@ -35,7 +27,7 @@ def __init__(self): def write(self, stream): """ - Write file to console, mainly used for debugging + Write file to console, mainly used for debugging """ # this is how to read from a file as stream file = stream.as_file() diff --git a/ack/writers/google_bigquery/__init__.py b/ack/writers/google_bigquery/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/writers/google_bigquery/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/writers/google_bigquery/cli.py b/ack/writers/google_bigquery/cli.py new file mode 100644 index 00000000..5d83b353 --- /dev/null +++ b/ack/writers/google_bigquery/cli.py @@ -0,0 +1,37 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from ack.utils.args import extract_args +from ack.utils.processor import processor +from ack.writers.google_bigquery.writer import GoogleBigQueryWriter + + +@click.command(name="write_bq") +@click.option("--bq-dataset", required=True) +@click.option("--bq-table", required=True) +@click.option("--bq-bucket", required=True) +@click.option("--bq-partition-column") +@click.option( + "--bq-write-disposition", default="truncate", type=click.Choice(["truncate", "append"]), +) +@click.option("--bq-location", default="EU", type=click.Choice(["EU", "US"])) +@click.option("--bq-keep-files", is_flag=True, default=False) +@processor() +def google_bigquery(**kwargs): + return GoogleBigQueryWriter(**extract_args("bq_", kwargs)) diff --git a/ack/writers/google_bigquery/config.py b/ack/writers/google_bigquery/config.py new file mode 100644 index 00000000..761548e8 --- /dev/null +++ b/ack/writers/google_bigquery/config.py @@ -0,0 +1,17 @@ +from typing import Literal + +from pydantic import BaseModel + + +WRITE_DISPOSITIONS = ("truncate", "append") +LOCATIONS = ("EU", "US") + + +class GoogleBigQueryWriterConfig(BaseModel): + dataset: str + table: str + bucket: str + partition_column: str = None + write_disposition: Literal[WRITE_DISPOSITIONS] = "truncate" + location: Literal[LOCATIONS] = "EU" + keep_files: bool = False diff --git a/nck/writers/bigquery_writer.py b/ack/writers/google_bigquery/writer.py similarity index 63% rename from nck/writers/bigquery_writer.py rename to ack/writers/google_bigquery/writer.py index 1469c0be..76db1226 100644 --- a/nck/writers/bigquery_writer.py +++ b/ack/writers/google_bigquery/writer.py @@ -15,57 +15,26 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import config -import click +from google.cloud import bigquery +from ack import config +from ack.config import logger +from ack.clients.google.client import GoogleClient +from ack.streams.normalized_json_stream import NormalizedJSONStream +from ack.utils.retry import retry +from ack.writers.google_cloud_storage.writer import GoogleCloudStorageWriter +from ack.writers.writer import Writer -from config import logging -from google.cloud import bigquery -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.writers.writer import Writer -from nck.writers.gcs_writer import GCSWriter -from nck.commands.command import processor -from nck.utils.args import extract_args -from nck.utils.retry import retry -from nck.helpers.google_base import GoogleBaseClass - - -@click.command(name="write_bq") -@click.option("--bq-dataset", required=True) -@click.option("--bq-table", required=True) -@click.option("--bq-bucket", required=True) -@click.option("--bq-partition-column") -@click.option( - "--bq-write-disposition", - default="truncate", - type=click.Choice(["truncate", "append"]), -) -@click.option("--bq-location", default="EU", type=click.Choice(["EU", "US"])) -@click.option("--bq-keep-files", is_flag=True, default=False) -@processor() -def bq(**kwargs): - return BigQueryWriter(**extract_args("bq_", kwargs)) - - -class BigQueryWriter(Writer, GoogleBaseClass): +class GoogleBigQueryWriter(Writer, GoogleClient): _client = None def __init__( - self, - dataset, - table, - bucket, - partition_column, - write_disposition, - location, - keep_files, + self, dataset, table, bucket, partition_column, write_disposition, location, keep_files, ): self._project_id = config.PROJECT_ID - self._client = bigquery.Client( - credentials=self._get_credentials(), project=self._project_id - ) + self._client = bigquery.Client(credentials=self._get_credentials(), project=self._project_id) self._dataset = dataset self._table = table self._bucket = bucket @@ -79,22 +48,20 @@ def write(self, stream): normalized_stream = NormalizedJSONStream.create_from_stream(stream) - gcs_writer = GCSWriter(self._bucket, self._project_id) + gcs_writer = GoogleCloudStorageWriter(self._bucket, self._project_id) gcs_uri, blob = gcs_writer.write(normalized_stream) table_ref = self._get_table_ref() - load_job = self._client.load_table_from_uri( - gcs_uri, table_ref, job_config=self.job_config() - ) + load_job = self._client.load_table_from_uri(gcs_uri, table_ref, job_config=self.job_config()) - logging.info("Loading data into BigQuery %s:%s", self._dataset, self._table) + logger.info(f"Loading data into BigQuery {self._dataset}:{self._table}") result = load_job.result() assert result.state == "DONE" if not self._keep_files: - logging.info("Deleting GCS file: %s", gcs_uri) + logger.info(f"Deleting GCS file: {gcs_uri}") blob.delete() def _get_dataset(self): diff --git a/ack/writers/google_cloud_storage/__init__.py b/ack/writers/google_cloud_storage/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/writers/google_cloud_storage/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/config.py b/ack/writers/google_cloud_storage/cli.py similarity index 57% rename from config.py rename to ack/writers/google_cloud_storage/cli.py index 32762f91..30362b46 100644 --- a/config.py +++ b/ack/writers/google_cloud_storage/cli.py @@ -15,35 +15,20 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging -import os -import sys -FORMAT = '%(asctime)s - (%(name)s) - %(levelname)s - %(message)s' -logging.basicConfig(format=FORMAT) -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -handler = logging.StreamHandler(sys.stdout) - -logger.handlers = [handler] - - -def env(): - return os.environ.get('ENV', 'dev') - - -def is_staging(): - return env() == 'staging' - - -def is_dev(): - return env() == 'dev' - - -def is_production(): - return env() == 'production' - - -for key, var in os.environ.items(): - locals()[key] = var +import click +from ack.utils.args import extract_args +from ack.utils.processor import processor +from ack.writers.google_cloud_storage.writer import GoogleCloudStorageWriter + + +@click.command(name="write_gcs") +@click.option("--gcs-bucket", help="GCS Bucket", required=True) +@click.option("--gcs-prefix", help="GCS path to write the file.") +@click.option("--gcs-project-id", help="GCS Project Id") +@click.option( + "--gcs-filename", help="Override the default name of the file (don't add the extension)", +) +@processor() +def google_cloud_storage(**kwargs): + return GoogleCloudStorageWriter(**extract_args("gcs_", kwargs)) diff --git a/ack/writers/google_cloud_storage/config.py b/ack/writers/google_cloud_storage/config.py new file mode 100644 index 00000000..833b766c --- /dev/null +++ b/ack/writers/google_cloud_storage/config.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel + + +class GoogleCloudStorageWriterConfig(BaseModel): + bucket: str + prefix: str = None + project_id: str + filename: str = None diff --git a/ack/writers/google_cloud_storage/writer.py b/ack/writers/google_cloud_storage/writer.py new file mode 100644 index 00000000..c4c80829 --- /dev/null +++ b/ack/writers/google_cloud_storage/writer.py @@ -0,0 +1,56 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import click +from google.cloud import storage +from ack import config +from ack.clients.google.client import GoogleClient +from ack.writers.object_storage.writer import ObjectStorageWriter + + +class GoogleCloudStorageWriter(ObjectStorageWriter, GoogleClient): + def __init__(self, bucket, project_id, prefix=None, filename=None, **kwargs): + self._project_id = self.get_project_id(project_id) + super().__init__(bucket, prefix, filename, platform="GCS", **kwargs) + + def _create_client(self): + return storage.Client(credentials=self._get_credentials(), project=self._project_id) + + def _create_bucket(self, client): + return client.bucket(self._bucket_name) + + def _list_buckets(self, client): + return client.list_buckets() + + def _create_blob(self, file_name, stream): + blob = self._bucket.blob(file_name) + blob.upload_from_file(stream.as_file(), content_type=stream.mime_type) + + def _get_uri(self, file_name): + return f"gs{self._get_file_path(file_name)}" + + @staticmethod + def get_project_id(project_id): + if project_id is None: + try: + return config.PROJECT_ID + except Exception: + raise click.exceptions.MissingParameter( + "Please provide a project id in ENV var or params.", param_type="--gcs-project-id", + ) + return project_id diff --git a/ack/writers/local/__init__.py b/ack/writers/local/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/writers/local/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/nck/writers/__init__.py b/ack/writers/local/cli.py similarity index 66% rename from nck/writers/__init__.py rename to ack/writers/local/cli.py index 44386ac3..b369baba 100644 --- a/nck/writers/__init__.py +++ b/ack/writers/local/cli.py @@ -15,24 +15,16 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from nck.writers.writer import Writer -from nck.writers.gcs_writer import gcs -from nck.writers.console_writer import console -from nck.writers.local_writer import local -from nck.writers.bigquery_writer import bq -from nck.writers.s3_writer import s3 +import click +from ack.utils.args import extract_args +from ack.utils.processor import processor +from ack.writers.local.writer import LocalWriter -writers = [ - gcs, - console, - local, - bq, - s3 - # "oracle": oracle, - # "gsheets": gsheets, - # "salesforce": salesforce -] - -__all__ = ["writers", "Writer"] +@click.command(name="write_local") +@click.option("--local-directory", "-d", required=True, help="Destination directory") +@click.option("--local-file-name", "-n", help="Destination file name") +@processor() +def local(**kwargs): + return LocalWriter(**extract_args("local_", kwargs)) diff --git a/ack/writers/local/config.py b/ack/writers/local/config.py new file mode 100644 index 00000000..cc6a530b --- /dev/null +++ b/ack/writers/local/config.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class LocalWriterConfig(BaseModel): + directory: str + file_name: str diff --git a/nck/writers/local_writer.py b/ack/writers/local/writer.py similarity index 68% rename from nck/writers/local_writer.py rename to ack/writers/local/writer.py index 95f64dbb..d5f4264a 100644 --- a/nck/writers/local_writer.py +++ b/ack/writers/local/writer.py @@ -15,33 +15,26 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click -import logging -import os - -from nck.writers.writer import Writer -from nck.commands.command import processor +import os -@click.command(name="write_local") -@click.option("--local-directory", required=True) -@processor() -def local(**kwargs): - return LocalWriter(**kwargs) +from ack.config import logger +from ack.writers.writer import Writer class LocalWriter(Writer): - def __init__(self, local_directory): - self._local_directory = local_directory + def __init__(self, directory, file_name): + self._directory = directory + self._file_name = file_name def write(self, stream): """ - Write file to disk at location given as parameter. + Write file to disk at location given as parameter. """ + file_name = self._file_name or stream.name + path = os.path.join(self._directory, file_name) - path = os.path.join(self._local_directory, stream.name) - - logging.info("Writing stream %s to %s", stream.name, path) + logger.info(f"Writing stream {file_name} to {path}") file = stream.as_file() with open(path, "wb") as h: while True: diff --git a/ack/writers/object_storage/__init__.py b/ack/writers/object_storage/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/ack/writers/object_storage/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/ack/writers/object_storage/writer.py b/ack/writers/object_storage/writer.py new file mode 100644 index 00000000..3fbcfad5 --- /dev/null +++ b/ack/writers/object_storage/writer.py @@ -0,0 +1,76 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import os + +from ack.config import logger +from ack.writers.writer import Writer + + +class ObjectStorageWriter(Writer): + def __init__(self, bucket_name, prefix=None, file_name=None, platform=None, **kwargs): + self._bucket_name = bucket_name + self._prefix = prefix if prefix else "" + self._file_name = file_name + self._platform = platform + self._bucket = self._get_bucket_if_exist() + + def write(self, stream): + logger.info(f"Start writing file to {self._platform} ...") + self._set_valid_file_name(stream.name) + final_name = os.path.join(self._prefix, self._file_name) + self._write_aux(stream, final_name) + + def _write_aux(self, stream, final_name): + self._create_blob(final_name, stream) + logger.info(f"Wrote {final_name} file to {self._bucket_name} on {self._platform} ...") + uri = self._get_uri(final_name) + logger.info(f"file can be found at {uri}") + + def _get_bucket_if_exist(self): + client = self._create_client() + bucket = self._create_bucket(client) + list_buckets_names = [bucket.name for bucket in self._list_buckets(client)] + try: + assert self._bucket_name in list_buckets_names + except AssertionError as err: + raise Exception( + f"{self._bucket_name} bucket does not exist. available buckets are {list_buckets_names}" + ).with_traceback(err.__traceback__) + return bucket + + def _get_file_path(self, file_name): + return f"://{self._bucket_name}/{file_name}" + + def _set_valid_file_name(self, stream_name): + file_format = os.path.splitext(stream_name)[-1] + self._file_name = f"{self._file_name}{file_format}" if self._file_name is not None else stream_name + + def _create_client(self): + return NotImplementedError + + def _create_bucket(self, client): + return NotImplementedError + + def _list_buckets(self, client): + return NotImplementedError + + def _create_blob(self, file_name, stream): + return NotImplementedError + + def _get_uri(self, file_name): + return NotImplementedError diff --git a/nck/writers/writer.py b/ack/writers/writer.py similarity index 100% rename from nck/writers/writer.py rename to ack/writers/writer.py diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d0c3cbf1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/build/.gitkeep b/docs/build/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..5a2307aa --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,51 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = "Artefactory Connectors Kit" +copyright = "2021, Artefact" +author = "Artefact" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ["sphinx.ext.autosectionlabel", "sphinx.ext.githubpages"] +autosectionlabel_prefix_document = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst new file mode 100644 index 00000000..f7a71461 --- /dev/null +++ b/docs/source/getting_started.rst @@ -0,0 +1,376 @@ +############### +Getting started +############### + +================================== +Set-up your developing environment +================================== + +To start using ACK and/or contributing, first clone the `dev` branch of the `GitHub repository `__: + +.. code-block:: shell + + git clone git@github.com:artefactory/artefactory-connectors-kit.git -b dev + +------------------- +Virtual environment +------------------- + +Create a virtual environment at the root of your local repository: + +.. code-block:: shell + + python3 -m venv ack-env + source ack-env/bin/activate + +Install dependencies: + +.. code-block:: shell + + pip install -r requirements.txt + pip install -r requirements-dev.text + +------- +Linting +------- + +We are using `black `__ and `Flake8 `__ for code linting. + +The black and Flake8 packages have already been installed in your virtual environment with dependencies. Also, a Flake8 configuration file (``.flake8``) and a black configuration file (``pyproject.toml``) are available at the root at this repository. + +---------------- +Pre-commit hooks +---------------- + +We are using `pre-commit `__ hooks to point out linting issues in our code before submission to code review. + +The pre-commit package has already been installed in your virtual environment with dependencies. Also, a pre-commit configuration file (``.pre-commit-config.yaml``) is available at the root of this repository. + +To finalize the installation and install git hooks scripts, execute: + +.. code-block:: shell + + pre-commit install + +For now on, the ``pre-commit`` command will run automatically on every ``git commit``. + +------------------------------- +TDD (*Test-Driven Development*) +------------------------------- + +We are running tests using `nose `__, an extension of the Python `unittest `__ framework. + +The nose package has already been installed in your virtual environment with dependencies. +To run existing tests, execute: + +.. code-block:: shell + + nosetests + +------------- +Documentation +------------- + +We are using `Sphinx `__ with a ReadTheDocs theme to document the application. + +The Sphinx package has already been installed in your virtual environment with dependencies. + +Sphinx documentation is available under the ``docs/source/`` directory of the `GitHub repository `__ as .rst files (each file representing a page). +It uses the reStructuredText (reST) syntax: to learn more about it, see the `official documentation `__. + +You can modify existing pages by editing the corresponding .rst files. + +If you want to create a new page, you should: + +1. Create a new ``.rst`` file under the ``docs/source/`` directory +2. Add a ``./.rst`` line in the ``docs/source/index.rst`` file as follows: + +.. code-block:: yaml + + .. toctree:: + :maxdepth: 2 + caption: Contents: + + ./overview.rst + ./getting_started.rst + ./readers.rst + ./streams.rst + ./writers.rst + ./to_go_further.rst + ./.rst + +To preview your changes, execute: + +.. code-block:: shell + + cd docs/ + make html + +It will create the .html files corresponding to your .rst source files in the ``docs/build/`` directory. +You can launch a preview of these .html files in your brower with your code editor (with VSCode: right-click on any .html file > Open with Live Server). + +Sphinx documentation is automatically deployed on GitHub Pages (by a dedicated GitHub workflow) each time code is pushed to the 'dev' branch of the repository. + +============================= +Launch your first ACK command +============================= + +Once this preliminary set-up is finalized, you can start using the application. + +There is two different way to use ACK commands. You can either build a full command by passing every argument you want or build a .json config file and pass it to ACK. Both ways are described below. + +--------------------- +ACK full command line +--------------------- + +ACK commands can be broken down into 3 parts: + +1. An entrypoint: all ACK cli commands are launched through the ``ack/entrypoints/cli/main.py`` executable. + +.. code-block:: shell + + python ack/entrypoints/cli/main.py + +2. A reader command, and its options: in the below example, we are reading Google Analytics data for the view , retrieving sessions, pageviews and bounces by date from 2020-01-01 to 2020-01-03. + +.. code-block:: shell + + read_ga --ga-client-id --ga-client-secret --ga-view-id --ga-refresh-token --ga-dimension ga:date --ga-metric ga:sessions --ga-metric ga:pageviews --ga-metric ga:bounces --ga-start-date 2020-01-01 --ga-end-date 2020-01-03 + +3. A writer command, and its options: in the below example, we are writing the output .nsjon stream into a Google Cloud Storage blob named ``google_analytics_report_2020-01-01.njson``, located under the Google Cloud Storage bucket ``ack_extracts``, with the path ``FR/google_analytics/``. + +.. code-block:: shell + + write_gcs --gcs-project-id --gcs-bucket ack_extracts --gcs-prefix FR/google_analytics --gcs-filename google_analytics_report_2020-01-01.njson + +To execute the ACK command as a whole, we just have to put these 3 parts together. + +To simplify your first test, instead of the writer command ``write_gcs``, we recommend you to use ``write_console`` (*it will write output stream records into your terminal*) or ``write_local --local-directory `` (*it will write output stream records into a local file*). In practice, these writer commands are very convenient for debugging, as they are quite simple. + +In the end, if we use ``write_console`` as a writer command, the combined ACK command will be: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_ga --ga-client-id --ga-client-secret --ga-view-id --ga-refresh-token --ga-dimension ga:date --ga-metric sessions --ga-metric ga:pageviews --ga-metric ga:bounces --ga-start-date 2020-01-01 --ga-end-date 2020-01-03 write_console + +You can now execute it into your terminal. + +---------------------------- +ACK with a .json config file +---------------------------- + +ACK can also use a .json config file to get all arguments. You can broke this command in 3 parts: + +1. An entrypoint: all ACK commands are launched through the ``ack/entrypoints/json/main.py`` executable. + +.. code-block:: shell + + python ack/entrypoints/json/main.py + +2. A path argument ``--config-file`` that will give to the entrypoint where to find the .json file with all the information. + +3. A .json config file organized as followed, with one reader and at least one writer: + +.. code-block:: JSON + + { + "option_name": "value", + "reader": { + "name": "reader_name", + "option_name": "value", + "option_name": ["value1", "value2"], + }, + "writers": [ + { + "name": "writer_name", + "option_name": "value", + }, + ] + } + +Here is a good example of a .json config file: + +.. code-block:: JSON + + { + "reader": { + "name": "twitter", + "consumer_key": "****", + "consumer_secret": "****", + "access_token": "****", + "access_token_secret": "*****", + "account_id": "*****", + "report_type": "ANALYTICS", + "entity": "PROMOTED_TWEET", + "metric_group": ["ENGAGEMENT"], + "segmentation_type": "AGE", + "granularity": "DAY", + "start_date": "2021-02-25", + "end_date": "2021-03-04" + }, + "writers": [ + { + "name": "console" + } + ] + } + +**Now that you understand how ACK commands are structured, you can follow these links to find the full documentation on available** :ref:`readers:Readers` and :ref:`writers:Writers`. + +===================== +Normalize field names +===================== + +Some destinations have specific requirements for field names. This is the case of BigQuery, that only accepts letters, digits and underscores. + +To normalize field names (i.e. replace any special character or white space by an underscore), you can add the option ``--normalize-keys true`` between ``python ack/entrypoint.py`` and the invocated reader command. If we keep using the previous Google Analytics example, it would give: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py --normalize-keys true read_ga --ga-client-id --ga-client-secret --ga-view-id --ga-refresh-token --ga-dimension ga:date --ga-metric sessions --ga-metric ga:pageviews --ga-metric ga:bounces --ga-start-date 2020-01-01 --ga-end-date 2020-01-03 write_console + +========== +Contribute +========== + +ACK is an open-source application initially developed by Artefact team: feel free to contribute! + +You can find open issues on `this GitHub page `__. If you identify additional enhancements/fixes that could be beneficial to the application, don't hesitate to add them to the list. + +Here are a few tips/guidelines to help you efficiently contribute: + +--------------------------- +How to develop a new reader +--------------------------- + +*Readers are reading data from an API source, and transform it into a stream object.* + +To create a new reader, you should: + +1. Create a ``ack/readers//`` directory, having the following structure: + +.. code-block:: shell + + - ack/ + -- readers/ + --- / + ---- cli.py + ---- reader.py + ---- config.py + ---- helper.py # Optional + +``cli.py`` + +This module should implement a click-decorated reader function: + + - The reader function should be decorated with: a ``@click.command()`` decorator, several ``@click.option()`` decorators (*one for each input provided by end-users*) and a ``@processor()`` decorator (*preventing secrets to appear in logs*). For further information on how to implement these decorators, please refer to `click documentation `__. + - The reader function should return a reader class (*more details below*). The source prefix of each option will be removed when passed to the writer class, using the ``extract_args()`` function. + +``reader.py`` + +This module should implement a reader class: + + - Class attributes should be the previously defined click options. + - The class should have a ``read()`` method, yielding a stream object. This stream object can be chosen from `available stream classes `__, and has 2 attributes: a stream name and a source generator function named ``result_generator()``, yielding individual source records. + +``config.py`` + +This module gathers all configuration variables. + +In addition, it's also managing reader's data validation thanks to Pydantic. Each reader must have a configuration class complying with: + + - Class name should be ``Config()``. + - It should inherit from ``BaseModel`` from Pydantic. + - Each class attribute should be declared with its name, its type and its default value if the attribute isn't required. + - If the reader has date inputs that follow the format 'YYYY-MM-DD', the class should have a ``@validator`` function to support this format (an example can be found in some readers as ``AdobeAnalytics14Reader``). + - If some attributes need additional processing, other ``@validator`` functions should be created for each of them. + +``helper.py`` (Optional) + +This module gathers all helper functions used in the ``reader.py`` module. + +2. In parallell, create unit tests for your methods under the ``tests/`` directory + +3. Add your click-decorated reader function to the ``ack/entrypoints/cli/readers.py`` file + +4. Add your reader class and your config class to the ``ack/entrypoints/json/readers.py`` file as ``(ClassReader, ClassConfig)`` + +5. Complete the documentation: + + - Add your reader to the list of existing readers in the :ref:`overview:Available Connectors` section. + - Add your reader to the list of existing readers in the repo's ``./README.md``. + - Create dedicated documentation for your reader CLI and JSON command on the :ref:`readers:Readers` page. It should include the followings sections: *Source API - How to obtain credentials - Quickstart - Command name - Command options* + +--------------------------- +How to develop a new stream +--------------------------- + +*Streams are local objects used by writers to process individual records collected from the source.* + +Each stream class should have: + +- 2 attributes : a stream name and a source generator function. Both values will be passed by the associated reader class (*the generator function is the* ``result_generator()`` *function defined in the reader class*). +- a ``readlines()`` method, yielding individual source records. + +Currently, these components are defined in the parent ``Stream`` class (*defined in the* ``ack/streams/stream.py`` *module*), and are inherited by all stream subclasses. + +--------------------------- +How to develop a new writer +--------------------------- + +*Writers are writing the output stream object to the destination of your choice.* + +To develop a new writer, you should: + +1. Create a ``ack/writers//`` directory, having the following structure: + +.. code-block:: shell + + - ack/ + -- writers/ + --- / + ---- cli.py + ---- writer.py + ---- config.py # Optional + ---- helper.py # Optional + +``cli.py`` + +This module should implement a click-decorated writer function: + + - The writer function should be decorated with: a ``@click.command()`` decorator, several ``@click.option()`` decorators (*one for each input provided by end-users*) and a ``@processor()`` decorator (*preventing secrets to appear in logs*). For further information on how to implement these decorators, please refer to `click documentation `__. + - The writer function should return a writer class (*more details below*). The destination prefix of each option will be removed when passed to the writer class, using the ``extract_args()`` function. + +``writer.py`` + +This module should implement a writer class: + + - Class attributes should be the previously defined click options. + - The class should have a ``write()`` method, writing the stream object to the destination. + +``config.py`` (Optional) + +This module gathers all configuration variables. + +In addition, it's also managing reader's data validation thanks to Pydantic. Each writer needing attributes to work, must have a configuration class complying with: + + - Class name should be ``Config()``. + - It should inherit from ``BaseModel`` from Pydantic. + - Each class attribute should be declared with its name, its type and its default value if the attribute isn't required. + - If some attributes need additional processing, other ``@validator`` functions should be created for each of them. + +``helper.py`` (Optional) + +This module gathers all helper functions used in the ``writer.py`` module. + +2. In parallell, create unit tests for your methods under the ``tests/`` directory + +3. Add your click-decorated writer function to the ``ack/entrypoints/cli/writers.py`` file + +4. Add your writer class and your config class to the ``ack/entrypoints/json/writers.py`` file as ``(ClassWriter, ClassConfig)``. If there is no config class, it should be ``(ClassWriter,)`` + +5. Complete the documentation: + + - Add your writer to the list of existing writers in the :ref:`overview:Available Connectors` section. + - Add your reader to the list of existing readers in the repo's ``./README.md``. + - Create dedicated documentation for your writer CLI and JSON command on the :ref:`writers:Writers` page. It should include the followings sections: *Quickstart - Command name - Command options* diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..aed688e3 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,19 @@ +.. Artefactory Connectors Kit documentation master file, created by + sphinx-quickstart on Tue Jan 5 11:25:16 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Artefactory Connectors Kit's documentation! +=================================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./overview.rst + ./getting_started.rst + ./readers.rst + ./streams.rst + ./writers.rst + ./to_go_further.rst + ./migration_to_v2.rst diff --git a/docs/source/migration_to_v2.rst b/docs/source/migration_to_v2.rst new file mode 100644 index 00000000..81cc1c0c --- /dev/null +++ b/docs/source/migration_to_v2.rst @@ -0,0 +1,54 @@ +####################### +Migration to v2 from v1 +####################### + +============== +v2.0 changelog +============== + +The main changes coming with v2.0 are: + +- name's changing from Nautilus Connectors Kit to Artefactory Connectors Kit. All references to NCK have been changed to ACK. + +- a new entrypoint using a json config file for commands to avoid very long command lines: ``ack/entrypoints/json/main.py``. It takes the config file's path as an argument with ``--config-file``: + +.. code-block:: shell + + python ack/entrypoints/json/main.py --config-file path/to/file.json + +- change of the cli entrypoint's path from ``nck/entrypoint.py`` to ``ack/entrypoints/cli/main.py``: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py reader --option-name option writer --option-name option + +- support only by Python 3.8 and by more recent versions to fully use Typing library + +============== +How to migrate +============== + +1. Pull the `dev` branch from the `GitHub repository `__ if you already set-up your environment: + +.. code-block:: shell + + git pull origin dev + +2. If your virtual environment is running Python 3.7 or less, you need to recreate it (you can test it with ``python -V`` in your virtualenv). **If it's running Python 3.8 or more, you can pass to step 3**. + +Your first need to be sure that your ``python3 -V`` gives version 3.8 or more. If not, you need to install a more recent Python version. Then, do: + +.. code-block:: shell + + rm -rf ack-env + python3 -m venv ack-env + source ack-env/bin/activate + +3. Install/update the dependencies: + +.. code-block:: shell + + pip install -r requirements.txt + pip install -r requirements-dev.text + +4. Change the entrypoint's path of all your commands from ``nck/entrypoint.py`` to ``ack/entrypoints/cli/main.py``. If you wish, you can also convert your commands into a json config file following the documentation in the :ref:`getting_started:ACK with a .json config file` section. diff --git a/docs/source/overview.rst b/docs/source/overview.rst new file mode 100644 index 00000000..4195606b --- /dev/null +++ b/docs/source/overview.rst @@ -0,0 +1,71 @@ +######## +Overview +######## + +**ACK is an E(T)L tool specialized in API data ingestion. It is accessible through a Command-Line Interface. The application allows you to easily extract, stream and load data (with minimum transformations), from the API source to the destination of your choice.** + +As of now, the most common output format of data loaded by the application is .njson (i.e. a file of n lines, where each line is a json-like dictionary). + +========== +Philosophy +========== + +The application is composed of **3 main components** (*implemented as Python classes*). When combined, these components act as an E(T)L pipeline, allowing you to stream data from a source to the destination of your choice: + +- :ref:`readers:Readers` are reading data from an API source, and transform it into a stream object. +- :ref:`streams:Streams` (*transparent to the end-user*) are local objects used by writers to process individual records collected from the source. +- :ref:`writers:Writers` are writing the output stream object to the destination of your choice. + +==================== +Available connectors +==================== + +As of now, the application is offering the following Readers & Writers: + +******* +Readers +******* + +- **Analytics** + - Adobe Analytics 1.4 + - Adobe Analytics 2.0 + - Google Analytics +- **Advertising - Adserver** + - Google Campaign Manager +- **Advertising - DSP** + - Google Display & Video 360 + - The Trade Desk +- **Advertising - Search** + - Google Ads + - Google Search Ads 360 + - Google Search Console + - Yandex Campaign + - Yandex Statistics +- **Advertising - Social** + - Facebook Marketing + - MyTarget + - Radarly + - Twitter Ads +- **CRM** + - SalesForce +- **Databases** + - MySQL +- **DevTools** + - Confluence +- **Files (.csv, .njson)** + - Amazon S3 + - Google Cloud Storage + - Google Sheets + +******* +Writers +******* + +- **Data Warehouses** + - Google BigQuery +- **Debugging** + - Console +- **Files (.njson)** + - Amazon S3 + - Google Cloud Storage + - Local file diff --git a/docs/source/readers.rst b/docs/source/readers.rst new file mode 100644 index 00000000..bc49f1a5 --- /dev/null +++ b/docs/source/readers.rst @@ -0,0 +1,1485 @@ +####### +Readers +####### + +**Readers are reading data from an API source, and transform it into a stream object.** + +*About to develop a new reader?* See the :ref:`getting_started:How to develop a new reader` section. + +*Just want to use an existing reader?* This page provides you with documentation on available commands: + +======================= +Adobe Analytics Readers +======================= + +As of May 2020 (last update of this section of the documentation), **two versions of Adobe Analytics Reporting API are coexisting: 1.4 and 2.0**. As some functionalities of API 1.4 have not been made available in API 2.0 yet (Data Warehouse reports in particular), our Adobe Analytics Readers are also available in these two versions. + +------------------------- +How to obtain credentials +------------------------- + +Both Adobe Analytics Readers use the **JWT authentication framework**. + +- Get developer access to Adobe Analytics (documentation can be found `here `__) +- Create a Service Account integration to Adobe Analytics on `Adobe Developer Console `__ +- Use the generated JWT credentials (Client ID, Client Secret, Technical Account ID, Organization ID and private.key file) to retrieve your Global Company ID (to be requested to `Discovery API `__). All these parameters will be passed to Adobe Analytics Readers. + +========================== +Adobe Analytics Reader 1.4 +========================== + +---------- +Source API +---------- + +`Analytics API v1.4 `__ + +---------- +Quickstart +---------- + +Call example to Adobe Analytics Reader 1.4, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_adobe --adobe-client-id --adobe-client-secret --adobe-tech-account-id --adobe-org-id --adobe-private-key --adobe-global-company-id --adobe-report-suite-id --adobe-date-granularity day --adobe-report-element-id trackingcode --adobe-report-metric-id visits --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_adobe`` + +JSON: ``adobe_analytics_1_4`` + +--------------- +Command options +--------------- + +============================== ============================ ================================================================================================================================================================================= +CMD Options JSON Options Definition +============================== ============================ ================================================================================================================================================================================= +``--adobe-client-id`` ``client_id`` Client ID, that you can find on Adobe Developer Console +``--adobe-client-secret`` ``client_secret`` Client Secret, that you can find on Adobe Developer Console +``--adobe-tech-account-id`` ``tech_account_id`` Technical Account ID, that you can find on Adobe Developer Console +``--adobe-org-id`` ``org_id`` Organization ID, that you can find on Adobe Developer Console +``--adobe-private-key`` ``private_key`` Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as ``\n``. +``--adobe-global-company-id`` ``global_company_id`` Global Company ID (to be requested to `Discovery API `__) +``--adobe-list-report-suite`` ``list_report_suite`` Should be set to True if you wish to request the list of available Adobe Report Suites (default: False). If set to True, the below parameters should be left empty. +``--adobe-report-suite-id`` ``report_suite_id`` ID of the requested Adobe Report Suite +``--adobe-report-element-id`` ``report_element_id`` (list) ID of the element (i.e. dimension) to include in the report +``--adobe-report-metric-id`` ``report_metric_id`` (list) ID of the metric to include in the report +``--adobe-date-granularity`` ``date_granularity`` Granularity of the report. Possible values: PREVIOUS_DAY, LAST_30_DAYS, LAST_7_DAYS, LAST_90_DAYS +``--adobe-start-date`` ``start_date`` Start date of the period to request (format: YYYY-MM-DD) +``--adobe-end-date`` ``end_date`` End date of the period to request (format: YYYY-MM-DD) +============================== ============================ ================================================================================================================================================================================= + +--------------------- +Addtional information +--------------------- + +- **The full list of available elements and metrics** can be retrieved with the `GetElements `__ and `GetMetrics `__ methods. +- **Adobe Analytics Reader 1.4 requests Data Warehouse reports** (the "source" parameter is set to "warehouse" in the report description), allowing it to efficiently process multiple-dimension requests. +- **If you need further information**, the documentation of Adobe APIs 1.4 can be found `here `__. + +========================== +Adobe Analytics Reader 2.0 +========================== + +---------- +Source API +---------- + +`Analytics API v2.0 `__ + +---------- +Quickstart +---------- + +Call example to Adobe Analytics Reader 2.0, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_adobe_2_0 --adobe-2-0-client-id --adobe-2-0-client-secret --adobe-2-0-tech-account-id --adobe-2-0-org-id --adobe-2-0-private-key --adobe-2-0-global-company-id --adobe-2-0-report-suite-id --adobe-2-0-dimension daterangeday --adobe-2-0-dimension campaign --adobe-2-0-start-date 2020-01-01 --adobe-2-0-end-date 2020-01-31 --adobe-2-0-metric visits write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_adobe_2_0`` + +JSON: ``adobe_analytics_2_0`` + +--------------- +Command options +--------------- + +================================== ======================= ================================================================================================================================================================================= +CMD Options JSON Options Definition +================================== ======================= ================================================================================================================================================================================= +``--adobe-2-0-client-id`` ``client_id`` Client ID, that you can find on Adobe Developer Console +``--adobe-2-0-client-secret`` ``client_secret`` Client Secret, that you can find on Adobe Developer Console +``--adobe-2-0-tech-account-id`` ``tech_account_id`` Technical Account ID, that you can find on Adobe Developer Console +``--adobe-2-0-org-id`` ``org_id`` Organization ID, that you can find on Adobe Developer Console +``--adobe-2-0-private-key`` ``private_key`` Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as ``\n``. +``--adobe-2-0-global-company-id`` ``global_company_id`` Global Company ID (to be requested to `Discovery API `__) +``--adobe-2-0-report-suite-id`` ``report_suite_id`` ID of the requested Adobe Report Suite +``--adobe-2-0-dimension`` ``dimension`` (list) Dimension to include in the report +``--adobe-2-0-metric`` ``metric`` (list) Metric to include in the report +``--adobe-2-0-start-date`` ``start_date`` Start date of the period to request (format: YYYY-MM-DD) +``--adobe-2-0-end-date`` ``end_date`` Start date of the period to request (format: YYYY-MM-DD) +``--adobe-2-0-date-range`` ``date_range`` Date range. By default, not available in Adobe, so choose among ACK default values: YESTERDAY, LAST_7_DAYS, PREVIOUS_WEEK, PREVIOUS_MONTH, LAST_90_DAYS +================================== ======================= ================================================================================================================================================================================= + +---------------------- +Additional information +---------------------- + +- **In API 2.0, dimension and metric names are slightly different from API 1.4**. To get new metric and dimension names and reproduce the behavior of Adobe Analytics UI as closely as possible, `enable the Debugger feature in Adobe Analytics Workspace `__: it allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. +- **In API 2.0, the date granularity parameter was removed, and should now be handled as a dimension**: a request featuring ``--adobe-dimension daterangeday`` will produce a report with a day granularity. +- **API 2.0 does not feature Data Warehouse reports yet** (along with other features, that are indicated on the "Current limitations" section of `this page `__). For this reason, if you wish to collect multiple-dimension reports, Adobe Analytics Reader 1.4 might be a more efficient solution in terms of processing time. +- **If you need any further information**, the documentation of Adobe APIs 2.0 can be found `here `__. + +================ +Amazon S3 Reader +================ + +---------- +Source API +---------- + +`AWS SDK for Python (Boto3) `__ + +---------- +Quickstart +---------- + +Execute the following commands to set your credentials: + +.. code-block:: shell + + export REGION_NAME= + export AWS_ACCESS_KEY_ID= + export AWS_SECRET_ACCESS_KEY= + +Once done, launch your S3 reader command. The following command retrieves the blobs located under the Amazon S3 bucket ``daily_reports`` and the blob prefix ``FR/offline_sales/``. + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_s3 --s3-bucket daily_reports --s3-prefix FR/offline_sales --s3-format csv write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_s3`` + +JSON: ``amazon_s3`` + +--------------- +Command options +--------------- + +============================== ==================== ======================================================================================================================================================================================================================================================================================================================================================================================================================= +CMD Options JSON Options Definition +============================== ==================== ======================================================================================================================================================================================================================================================================================================================================================================================================================= +``--s3-bucket`` ``bucket`` S3 bucket name +``--s3-prefix`` ``prefix`` (list) S3 blob prefix. Several prefixes can be provided in a single command. +``--s3-format`` ``format`` S3 blob format. Possible values: csv, gz. +``--s3-dest-key-split`` ``dest_key_split`` Indicates how to retrieve a blob name from a blob key (a blob key being the combination of a blob prefix and a blob name: /). The reader splits the blob key on the "/" character: the last element of the output list is considered as the blob name, and is used to name the stream produced by the reader. This option defines how many splits to do. Default: -1 (split on all occurences). +``--s3-csv-delimiter`` ``csv_delimiter`` Delimiter that should be used to read the .csv file. Default: , +``--s3-csv-fieldnames`` ``fieldnames`` List of field names. If set to None (default), the values in the first row of .csv file will be used as field names. +============================== ==================== ======================================================================================================================================================================================================================================================================================================================================================================================================================= + +================= +Confluence Reader +================= + +---------- +Source API +---------- + +`Confluence Cloud REST API `__ + +---------- +Quickstart +---------- + +The Confluence Reader handles calls to the **Get Content endpoint** of Confluence Cloud REST API. + +The following command retrieves the titles, space names, tiny links and label names of all pages located under the Atlassian domain , filtered on the spacekeys and . + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_confluence --confluence-user-login --confluence-api-token --confluence-atlassian-domain --confluence-content-type "page" --confluence-field "title" --confluence-field "space.name" --confluence-field "tiny_link" --confluence-field "label_names" --confluence-spacekey --confluence-spacekey write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_confluence`` + +JSON: ``confluence`` + +--------------- +Command options +--------------- + +================================== ====================== ============================================================================================================================================================================================ +CMD Options JSON Options Definition +================================== ====================== ============================================================================================================================================================================================ +``--confluence-user-login`` ``user_login`` User login associated with your Atlassian account +``--confluence-api-token`` ``api_token`` API token associated with your Atlassian account (can be generated on `this page `__) +``--confluence-atlassian-domain`` ``atlassian_domain`` Atlassian domain under which the content to request is located +``--confluence-content-type`` ``content_type`` Type of content on which the report should be filtered. Possible values: page (default), blog_post. +``--confluence-spacekey`` ``spacekey`` (list) (Optional) Space keys on which the report should be filtered +``--confluence-field`` ``field`` (list) Fields that should be included in the report (path.to.field.value or custom_field) +================================== ====================== ============================================================================================================================================================================================ + +Please visit the following two pages for a better understanding of the `Authentification method `__, and of the parameters used in the `Get Content endpoint `__. + +The Confluence Reader supports two types of fields: + +**Standard fields** - You specify the path to the value that you you wish to retrieve in the raw API response (each path item being separated by dots). + +*Example* - The standard field ``space.name`` will retrieve the value ``"How To Guides"`` for the first item, and the value ``"Clients"`` for the second item. + +.. code-block:: shell + + RAW_API_RESPONSE = {"results": + [ + { + "title": "Making API requests with ACK", + "space": {"name": "How To Guides"}, + "metadata": {"labels": {"results": [{"name": "ack"}, {"name": "api"}]}} + }, + { + "title": "Samsung - Precision Marketing", + "space": {"name": "Clients"}, + "metadata": {"labels": {"results": [{"name": "pm"}]}} + } + ] + } + +**Custom fields** - If the format of the raw API response does not match your needs, you can define a custom field. Available custom fields are described in the CUSTOM_FIELDS variable of the ``ack.helpers.confluence_helper`` module. + +*Example* - The custom field ``label_names`` transforms the value of the source field ``metadata.labels.results`` using the function ``_get_key_values_from_list_of_dct``. In other words, using the first record of the previous example, it will format ``[{"name": "ack"}, {"name": "api"}]`` into ``"ack|api"``. + +.. code-block:: shell + + CUSTOM_FIELDS = { + "label_names": { + "source_field": "metadata.labels.results", + "format_function": _get_key_values_from_list_of_dct, + "format_function_kwargs": {"key": "name"}, + "formatted_object_type": str + } + } + +========================= +Facebook Marketing Reader +========================= + +---------- +Source API +---------- + +`Facebook Marketing API `__ + +---------- +Quickstart +---------- + +The Facebook Marketing Reader handles calls to 2 endpoints of the Facebook Marketing API: **Facebook Ad Insights** (to retrieve performance data), and **Facebook Ad Management** (to retrieve configuration data). + +*Example of Ad Insights Request* + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_facebook --facebook-access-token --facebook-object-id --facebook-breakdown age --facebook-breakdown gender --facebook-action-breakdown action_type --facebook-field ad_id --facebook-field ad_name --facebook-field impressions --facebook-field clicks --facebook-field actions[action_type:post_engagement] --facebook-field actions[action_type:video_view] --facebook-field age --facebook-field gender --facebook-time-increment 1 --facebook-start-date 2020-01-01 --facebook-end-date 2020-01-03 write_console + +*Example of Ad Management Request* + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_facebook --facebook-access-token --facebook-object-id --facebook-ad-insights False --facebook-level ad --facebook-field id --facebook-field creative[id] --facebook-add-date-to-report True --facebook-start-date 2020-01-01 --facebook-end-date 2019-01-01 write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_facebook`` + +JSON: ``facebook`` + +--------------- +Command options +--------------- + +================================== ============================ ============================================================================================================================================================================================================================== +CMD Options JSON Options Definition +================================== ============================ ============================================================================================================================================================================================================================== +``--facebook-app-id`` ``app_id`` Facebook App ID. Not mandatory if Facebook Access Token is provided. +``--facebook-app-secret`` ``app_secret`` Facebook App Secret. Not mandatory if Facebook Access Token is provided. +``--facebook-access-token`` ``access_token`` Facebook App Access Token. +``--facebook-object-type`` ``object_type`` Nature of the root Facebook Object used to make the request. Possible values: pixel (Ad Management requests only), creative (Ad Management requests only), ad, adset, campaign, account (default). +``--facebook-object-id`` ``object_id`` (list) ID of the root Facebook Object used to make the request. +``--facebook-level`` ``level`` Granularity of the response. Possible values: pixel (Ad Management requests only), creative (Ad Management requests only), ad (default), adset, campaign, account. +``--facebook-ad-insights`` ``ad_insights`` True (default) if Ad Insights request, False if Ad Management request. +``--facebook-field`` ``field`` (list) Fields to be retrieved. +``--facebook-start-date`` ``start_date`` Start date of the period to request (format: YYYY-MM-DD). This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels. +``--facebook-end-date`` ``end-date`` End date of the period to request (format: YYYY-MM-DD). This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels. +``--facebook-date-preset`` ``date_preset`` Relative time range. Ignored if ``--facebook-start-date`` and ``--facebook-end-date`` are specified. This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels. +``--facebook-time-increment`` ``time_increment`` Cuts the results between smaller time slices within the specified time range. This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels. +``--facebook-add-date-to-report`` ``add_date_to_report`` True if you wish to add the date of the request to each response record, False otherwise (default). +``--facebook-breakdown`` ``breakdown`` (list) How to break down the result. This parameter is only relevant for Ad Insights Requests. +``--facebook-action-breakdown`` ``action_breakdown`` (list) How to break down action results. This parameter is only relevant for Ad Insights Requests. +================================== ============================ ============================================================================================================================================================================================================================== + +1. Make sure to select the appropriate ``--facebook-level`` + +================================== ============================================= +If Facebook Object Type is... Facebook Level can be... +================================== ============================================= +``account`` account, campaign, adset, ad, creative, pixel +``campaign`` campaign, adset, ad +``adset`` adset, ad, creative +``ad`` ad, creative +``creative`` creative +``pixel`` pixel +================================== ============================================= + +2. Format Facebook Marketing Reader response using ``--facebook-field`` + +2.1. The list of applicable fields can be found on the links below: + +- Ad Insights Request: `all fields `__ +- Ad Management Request: `Account-level fields `__, `Campaign-level fields `__, `Adset-level fields `__, `Ad-level fields `__, `Creative-level fields `__, `Pixel-level fields `__ + +2.2. If you want to select a nested field value, simply indicate the path to this value within the request field. + +*Facebook Marketing Reader Request* + +.. code-block:: shell + + --facebook-field object_story_spec[video_data][call_to_action][value][link] + +*API Response* + +.. code-block:: shell + + "object_story_spec": { + "video_data": { + "call_to_action": { + "type": "LEARN_MORE", + "value": { + "link": "https://www.artefact.com", + "link_format": "VIDEO_LPP" + } + } + } + } + +*Facebook Marketing Reader Response* + +.. code-block:: shell + + {"object_story_spec[video_data][call_to_action][value][link]": "https://www.artefact.com"} + +2.3 Action Breakdown filters can be applied to the fields of Ad Insights Requests using the following syntax: [:]. You can combine multiple Action Breakdown filters on the same field by adding them in cascade next to each other. + +*Facebook Marketing Reader Request* + +.. code-block:: shell + + --facebook-action-breakdown action_type + --facebook-field actions[action_type:video_view][action_type:post_engagement] + +*API Response* + +.. code-block:: shell + + "actions": [ + { + "action_type": "video_view", + "value": "17" + }, + { + "action_type": "link_click", + "value": "8" + }, + { + "action_type": "post_engagement", + "value": "25" + }, + { + "action_type": "page_engagement", + "value": "12" + } + ] + +*Facebook Marketing Reader Response* + +.. code-block:: shell + + {"actions[action_type:video_view]": "17", "actions[action_type:post_engagement]": "25"} + +============== +Google Readers +============== + +-------------- +Authentication +-------------- + +You can authenticate to most of the Readers of the Google Suite following the same schema. You'll need to generate a **refresh token** to connect via the OAuth flow. A full script to do this can be found in this `refresh token generator `__. + +================= +Google Ads Reader +================= + +---------- +Source API +---------- + +`AdWords API `__ + +------------------------- +How to obtain credentials +------------------------- + +Using the AdWords API requires four things: + +- A developer token (Generated at a company level - one per company -, takes around 2 days to be approved by Google) which can be completely independant from the Google Ads Account you will be calling (though you need a Manager Google Ads Account to request a token for your company) +- OAuth2 credentials: and +- A refresh token, created with the email address able to access to all the Google Ads Account you will be calling +- The ID of the Google Ads Accounts you will be reading from (XXX-XXX-XXXX numbers, written right next to your Account Name) + +See the `documentation here `__ to apply for access if your Company does not already have a developer token (granting you the right to use the API). + +See the `documentation here `__ to set-up your OAuth2 credentials and refresh token specifically for your Google Ads Accounts. + +---------- +Quickstart +---------- + +The following command retrieves insights about the Ads of ``my_first_campaign`` and ``my_second_campaign`` in the Google Ads Account . + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_googleads --googleads-developer-token --googleads-client-id --googleads-client-secret --googleads-refresh-token --googleads-client-customer-id --googleads-report-type AD_PERFORMANCE_REPORT --googleads-date-range-type LAST_7_DAYS --googleads-field CampaignName --googleads-field AdGroupName --googleads-field Headline --googleads-field Date --googleads-field Impressions --googleads-report-filter "{'field':'CampaignName','operator':'IN','values':['my_first_campaign','my_second_campaign']}" + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_googleads`` + +JSON: ``google_ads`` + +--------------- +Command options +--------------- + +========================================== ================================ ========================================================================================================================================================================================================== +CMD Options JSON Options Definition +========================================== ================================ ========================================================================================================================================================================================================== +``--googleads-developer-token`` ``developer_token`` Company Developer token for Google Ads API +``--googleads-client-id`` ``client_id`` OAuth2 ID +``--googleads-client-secret`` ``client_secret`` OAuth2 secret +``--googleads-refresh-token`` ``refresh_token`` Refresh token for OAuth2 +``--googleads-manager-id`` ``manager_id`` (Optional) Manager_Account_ID (XXX-XXX-XXXX identifier) +``--googleads-client-customer-id`` ``client_customer-ids`` (list) GAds_Account_ID (ignored if a manager account ID was given) +``--googleads-report-name`` ``report_name`` (Optional) Name of your output stream ("Custom Report" by default) +``--googleads-report-type`` ``report_type`` Type of report to be called +``--googleads-date-range-type`` ``date_range_type`` Type of date range to apply (if "CUSTOM_RANGE", a min and max date must be specified). Possible values can be found `here `__. +``--googleads-start-date`` ``start_date`` (Optional) Start date for "CUSTOM_RANGE" date range (format: YYYY-MM-DD) +``--googleads-end-date`` ``end_date`` (Optional) End date for "CUSTOM_RANGE" date range (format: YYYY-MM-DD) +``--googleads-field`` ``fields`` (list) Fields to include in the report +``--googleads-report-filter`` ``report_filter`` Filter to apply on a chosen field (Dictionary as String "{'field':,'operator':,'values':}") +``--googleads-include-zero-impressions`` ``include_zero_impressions`` Boolean specifying whether or not rows with zero impressions should be included in the report +``--googleads-filter-on-video-campaigns`` ``filter_on_video_campaigns`` Boolean used to filter the report on Video Campaigns only (require CampaignId to be listed as a field) +``--googleads-include-client-customer-id`` ``include_client_customer_id`` Boolean used to add "AccountId" as a field in the output stream. AccountId is not available in the API, but is known since it's a requirement to call the API (= Client Customer ID) +========================================== ================================ ========================================================================================================================================================================================================== + +See documentation below for a better understanding of the parameters: + +- `Reporting basics `__ +- `Available reports and associated fields `__ + +======================= +Google Analytics Reader +======================= + +---------- +Source API +---------- + +`Analytics Reporting API `__ + +---------- +Quickstart +---------- + +The following command retrieves sessions, pageviews and bounces volumes by date from 2020-01-01 to 2020-01-03, for the Analytics View . + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_ga --ga-client-id --ga-client-secret --ga-view-id --ga-refresh-token --ga-dimension ga:date --ga-metric sessions --ga-metric ga:pageviews --ga-metric ga:bounces --ga-start-date 2020-01-01 --ga-end-date 2020-01-03 write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_ga`` + +JSON: ``google_analytics`` + +--------------- +Command options +--------------- + +============================== ====================== =============================================================================================================================================================================================================== +CMD Options JSON Options Definition +============================== ====================== =============================================================================================================================================================================================================== +``--ga-client-id`` ``client_id`` OAuth2 ID +``--ga-client-secret`` ``client_secret`` OAuth2 secret +``--ga-access-token`` ``access_token`` (Optional) Access token for OAuth2 +``--ga-refresh-token`` ``refresh_token`` Refresh token for OAuth2 +``--ga-view-id`` ``view_id`` (list) Analytics View ID from which to retrieve data. See documentation `here `__ for a better understanding of Google Analytics hierrarchy. +``--ga-account-id`` ``account_id`` (list) Analytics Account ID from which to retrieve data. See documentation `here `__ for a better understanding of Google Analytics hierrarchy. +``--ga-dimension`` ``dimension`` (list) Dimensions to include in the report (max 9). Possible values can be found `here `__. +``--ga-metric`` ``metric`` (list) Metrics to include in the report (min 1, max 10). Possible values can be found `here `__. +``--ga-segment-id`` ``segment_id`` (list) Segment ID of a built-in or custom segment (for example gaid::-3) on which report data should be segmented. +``--ga-start-date`` ``start_date`` Start date of the period to request (format: YYYY-MM-DD) +``--ga-end-date`` ``end_date`` End date of the period to request (format: YYYY-MM-DD) +``--ga-date-range`` ``date_range`` of the period to request, specified as a unique argument (format: YYYY-MM-DD YYYY-MM-DD) +``--ga-day-range`` ``day_range`` Relative time range. Possible values: PREVIOUS_DAY, LAST_30_DAYS, LAST_7_DAYS, LAST_90_DAYS. +``--ga-sampling-level`` ``sampling_level`` Desired sample size. See documentation `here `__ for a better understanding of Google Analytics sampling. Possible values: SMALL, DEFAULT, LARGE (default). +``--ga-add-view`` ``add_view`` If set to True (default: False), adds a "ga:viewId" field to the output stream. +============================== ====================== =============================================================================================================================================================================================================== + +See documentation `here `__ for a better understanding of the parameters. + +=========================== +Google Cloud Storage Reader +=========================== + +---------- +Source API +---------- + +`GCP Client Library for Cloud Storage `__ + +---------- +Quickstart +---------- + +Follow these steps to set your credentials: + +- In your GCP project, create a Service Account with a 'Storage Object Viewer' role +- Create a .JSON key for this Service Account, and download the key file locally +- Execute the following commands: + +.. code-block:: shell + + export project_id= + export GCP_KEY_PATH= + +Once done, launch your Google Cloud Storage reader command. The following command retrieves the blobs located under the Google Cloud Storage bucket ``daily_reports`` and the blob prefix ``FR/offline_sales/``: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_gcs --gcs-bucket daily_reports --gcs-prefix FR/offline_sales --gcs-format csv write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_gcs`` + +JSON: ``google_cloud_storage`` + +--------------- +Command options +--------------- + +============================== =================== ======================================================================================================================================================================================================================================================================================================================================================================================================================== +CMD Options JSON Options Definition +============================== =================== ======================================================================================================================================================================================================================================================================================================================================================================================================================== +``--gcs-bucket`` ``bucket`` Cloud Storage bucket name +``--gcs-prefix`` ``prefix`` (list) Cloud Storage blob prefix. Several prefixes can be provided in a single command. +``--gcs-format`` ``format`` Cloud Storage blob format. *Possible values: csv, gz* +``--gcs-dest-key-split`` ``dest_key-split`` Indicates how to retrieve a blob name from a blob key (a blob key being the combination of a blob prefix and a blob name: /). The reader splits the blob key on the "/" character: the last element of the output list is considered as the blob name, and is used to name the stream produced by the reader. This option defines how many splits to do. *Default: -1 (split on all occurences)* +``--gcs-csv-delimiter`` ``csv_delimiter`` Delimiter that should be used to read the .csv file. *Default: ,* +``--gcs-csv-fieldnames`` ``csv_fieldnames`` List of field names. If set to *None* (*default*), the values in the first row of .csv file will be used as field names. +============================== =================== ======================================================================================================================================================================================================================================================================================================================================================================================================================== + +============================== +Google Campaign Manager Reader +============================== + +---------- +Source API +---------- + +`DCM/DFA Reporting and Trafficking API `__ + +---------- +Quickstart +---------- + +The following command retrieves impressions, clicks and cost volumes from 2020-01-01 to 2020-01-03. + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_dcm --dcm-client-id --dcm-client-secret --dcm-refresh-token --dcm-profile-id --dcm-dimension dfa:date --dcm-metric dfa:impressions --dcm-metric dfa:clicks --dcm-metric dfa:mediaCost --dcm-start-date 2020-01-01 --dcm-end-date 2020-01-03 write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_dcm`` + +JSON: ``google_dcm`` + +--------------- +Command options +--------------- + +============================== ========================== ======================================================================================================================================================================================================================================================================================================================================= +CMD Options JSON Options Definition +============================== ========================== ======================================================================================================================================================================================================================================================================================================================================= +``--dcm-client-id`` ``client_id`` OAuth2 ID +``--dcm-client-secret`` ``client_secret`` OAuth2 secret +``--dcm-access-token`` ``access_token`` (Optional) Access token for OAuth2 +``--dcm-refresh-token`` ``refresh_token`` Refresh token for OAuth2 +``--dcm-profile-id`` ``profile_ids`` (list) ID of the DFA user profile that has been granted permissions to the CM account for which you want to retrieve data. You should have 1 DFA user profile per CM account that you can access. The associated ID can be found directly on your Campaign Manager UI (when accessing your list of CM accounts, on the top right hand corner). +``--dcm-report-name`` ``report_name`` Name of the report, that will appear in CM UI. +``--dcm-report-type`` ``report_type`` Type of the report. Possible values: CROSS_DIMENSION_REACH, FLOODLIGHT, PATH_TO_CONVERSION, REACH, STANDARD. +``--dcm-dimension`` ``dimensions`` (list) Dimensions to include in the report. Possible values can be found `here `__. +``--dcm-metric`` ``metrics`` (list) Metrics to include in the report. Possible values can be found `here `__. +``--dcm-filter`` ``filters`` (list(tuple)) association, used to narrow the scope of the report. For instance "dfa:advertiserId XXXXX" will narrow report scope to the performance of Advertiser ID XXXXX. Possible filter types can be found `here `__. +``--dcm-start-date`` ``start_date`` Start date of the period to request (format: YYYY-MM-DD) +``--dcm-end-date`` ``end_date`` End date of the period to request (format: YYYY-MM-DD) +``--dcm-date-range`` ``date_range`` Date range. By default, not available in DCM, so choose among ACK default values: YESTERDAY, LAST_7_DAYS, PREVIOUS_WEEK, PREVIOUS_MONTH, LAST_90_DAYS +============================== ========================== ======================================================================================================================================================================================================================================================================================================================================= + +=========================================== +Google DoubleClick Bid Manager Reader (DBM) +=========================================== + +---------- +Source API +---------- + +`Doubleclick Bid Manager API `__ + +---------- +Quickstart +---------- + +The following command retrieves impressions, clicks and cost volumes filtered on a specific from 2020-01-01 to 2020-01-03. + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_dbm --dbm-client-id --dbm-client-secret —dbm-refresh-token —dbm-filter FILTER_ADVERTISER --dbm-query-dimension FILTER_DATE --dbm-query-metric METRIC_IMPRESSIONS --dbm-query-metric METRIC_CLICKS --dbm-query-metric METRIC_MEDIA_COST_ADVERTISER --dbm-query-param-type TYPE_GENERAL --dbm-request-type custom_query_report --dbm-start-date 2020-01-01 --dbm-end-date 2020-01-03 write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_dbm`` + +JSON: ``google_dbm`` + +--------------- +Command options +--------------- + +============================== =========================== ================================================================================================================================================================================================================================================================================================================ +CMD Options JSON Options Definition +============================== =========================== ================================================================================================================================================================================================================================================================================================================ +``--dbm-client-id`` ``client_id`` OAuth2 ID +``--dbm-client-secret`` ``client_secret`` OAuth2 secret +``--dbm-access-token`` ``access_token`` (Optional) Access token for OAuth2 +``--dbm-refresh-token`` ``refresh_token`` Refresh token for OAuth2 +``--dbm-query-request-type`` ``query_request_type`` Doubleclick Bid Manager API request type. Possible values: existing_query, custom_query, existing_query_report, custom_query_report, lineitems_objects, sdf_objects and list_reports. +``--dbm-query-id`` ``query_id`` Query ID. +``--dbm-query-title`` ``query_title`` Query title, used to name the reports generated from this query in DV360 UI. +``--dbm-query-frequency`` ``query_frequency`` How often the query is run. Possible values can be found `here `__. Default: ONE_TIME. +``--dbm-filter`` ``filter`` (list(tuple)) association, used to narrow the scope of the report. For instance "FILTER_ADVERTISER XXXXX" will narrow report scope to the performance of Advertiser ID XXXXX. Possible filter types can be found `here `__. +``--dbm-query-dimension`` ``query_dimension`` (list) Dimensions to include in the report. Possible values can be found `here `__. +``--dbm-query-metric`` ``query_metric`` (list) Metrics to include in the report. Possible values can be found `here `__. +``--dbm-query-param-type`` ``query_param_type`` Report type. Possible values can be found `here `__. Default: TYPE_TRUEVIEW. +``--dbm-start-date`` ``start_date`` Start date of the period to request (format: YYYY-MM-DD) +``--dbm-end-date`` ``end_date`` End date of the period to request (format: YYYY-MM-DD) +``--dbm-add-date-to-report`` ``add_date_to_report`` Sometimes the date range on which metrics are computed is missing from the report. If this option is set to True (default: False), this range will be added. +``--dbm-file-type`` ``file_type`` (list) File types +``--dbm-date-format`` ``date_format`` Add optional date format for the output stream. Follow the syntax of https://docs.python.org/3.8/library/datetime.html#strftime-strptime-behavior +``--dbm-day-range`` ``day_range`` Day range chosen across these values: PREVIOUS_DAY, LAST_30_DAYS, LAST_90_DAYS, LAST_7_DAYS, PREVIOUS_MONTH, PREVIOUS_WEEK +============================== =========================== ================================================================================================================================================================================================================================================================================================================ + +=================== +Google DV360 Reader +=================== + +---------- +Source API +---------- + +`DV360 API `__ + +------------------------- +How to obtain credentials +------------------------- + +As for DBM, the DV360 API uses OAuth 2.0 for authentication. There is not a single way to generate credentials but one is descrived below: + +- Enable DV360 API in a GCP project +- Generate a client id / client secret pair +- Log in with the user that can access DV360 +- Go to the `OAuth 2.0 Playground `__ + + - Go to the OAuth 2.0 configuration (the wheel in the upper right corner) and put your client id and client secret + - Select the DV360 API + - Exchange authorization codes for tokens. This is where you may have to log in with the account that can access DV360 + +You should now have an access token and a refresh token. Save them carefully. + +---------- +Quickstart +---------- + +Say you want to get a SDF file for all campaigns of a specific advertiser. You can run: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_dv360 --dv360-client-id --dv360-client-secret --dv360-refresh-token --dv360-access-token --dv360-advertiser-id --dv360-filter-type 'FILTER_TYPE_NONE' --dv360-file-type 'FILE_TYPE_CAMPAIGN' write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_dv360`` + +JSON: ``google_dv360`` + +--------------- +Command options +--------------- + +============================== ===================== =============================================================== +CMD Options JSON Options Definition +============================== ===================== =============================================================== +``--dv360-access-token`` ``access_token`` Access token you during the process of getting tokens +``--dv360-refresh-token`` ``refresh_token`` Refresh token you during the process of getting tokens +``--dv360-client-id`` ``client_id`` Client ID you generated in the GCP environment +``--dv360-client-secret`` ``client_secret`` Client secret you generated in the GCP environment +``--dv360-advertiser-id`` ``advertiser_id`` One of the advertiser IDs you have access to +``--dv360-request-type`` ``request_type`` Request type. Choose among 'sdf_request' and 'creative_request' +``--dv360-file-type`` ``file_type`` (list) SDF level +``--dv360-filter-type`` ``filter_type`` SDF filter. Depends on the level. +============================== ===================== =============================================================== + +============================ +Google Search Console Reader +============================ + +---------- +Source API +---------- + +`Search Console API (Search Analytics endpoint) `__ + +------------------------- +How to obtain credentials +------------------------- + +Using the Google Search Console API requires three main parameters: + +- OAuth2 credentials: and +- A refresh token, created with the email address able to access to your Google Search Console Account. +- The URLs whose performance you want to see + +---------- +Quickstart +---------- + +The following command retrieves insights about the URL from 2020-01-01 to 2020-01-03. + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_search_console --search-console-client-id --search-console-refresh-token --search-console-site-url --search-console-dimensions country --search-console-dimensions device --search-console-start-date 2020-01-01 --search-console-end-date 2020-01-03 write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_search_console`` + +JSON: ``google_search_console`` + +--------------- +Command options +--------------- + +================================== ====================== ============================================================================================================================================================================================================ +CMD Options JSON Options Definition +================================== ====================== ============================================================================================================================================================================================================ +``--search-console-client-id`` ``client_id`` OAuth2 ID +``--search-console-client-secret`` ``client_secret`` OAuth2 secret +``--search-console-access-token`` ``access_token`` Access token for OAuth2 +``--search-console-refresh-token`` ``refresh_token`` Refresh token for OAuth2 +``--search-console-dimensions`` ``dimensions`` (list) Dimensions of the report. Possible values can be found `here `__. +``--search-console-site-url`` ``site_url`` Site URL whose performance you want to request +``--search-console-start-date`` ``start_date`` Start date of the period to request (format: YYYY-MM-DD) +``--search-console-end-date`` ``end_date`` End date of the period to request (format: YYYY-MM-DD) +``--search-console-date-range`` ``date_range`` Date range. By default, not available in Search Console, so choose among ACK default values: YESTERDAY, LAST_7_DAYS, PREVIOUS_WEEK, PREVIOUS_MONTH, LAST_90_DAYS +``--search-console-date-column`` ``date_column`` If set to True, a date column will be included in the report +``--search-console-row-limit`` ``row_limit`` Row number by report page +================================== ====================== ============================================================================================================================================================================================================ + +See documentation `here `__ for a better understanding of the parameters. + +============================ +Google Search Ads 360 Reader +============================ + +---------- +Source API +---------- + +`Search Ads 360 API `__ + +------------------------- +How to obtain credentials +------------------------- + +Using the Search Ads API requires two things: +- OAuth2 credentials: and +- A refresh token, created with the email address able to access to all the Search Ads 360 Account you will be calling + +See the `documentation here `__ +to set-up your OAuth2 credentials and refresh token specifically for Search Ads 360 Reporting. + +---------- +Quickstart +---------- + +The following command retrieves insights about the Ads in the Search Ads 360 Account from the agency . + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_sa360 --sa360-client-id --sa360-client-secret --sa360-refresh-token --sa360-agency-id --sa360-advertiser-id --sa360-report-type keyword --sa360-column date --sa360-column impr --sa360-column clicks --sa360-start-date 2020-01-01 --sa360-end-date 2020-01-01 + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_sa360`` + +JSON: ``google_sa360`` + +--------------- +Command options +--------------- + +============================== ========================== ======================================================================================================================================================= +CMD Options JSON Options Definition +============================== ========================== ======================================================================================================================================================= +``--sa360-client-id`` ``client_id`` OAuth2 ID +``--sa360-client-secret`` ``client_secret`` OAuth2 secret +``--sa360-access-token`` ``access_token`` (Optional) Access token +``--sa360-refresh-token`` ``refresh_token`` Refresh token +``--sa360-agency-id`` ``agency_id`` Agency ID to request in SA360 +``--sa360-advertiser-id`` ``advertiser_ids`` (list) Optional) Advertiser ids to request. If not provided, every advertiser of the agency will be requested +``--sa360-report-name`` ``report_name`` (Optional) Name of the output report +``--sa360-report-type`` ``report_type`` Type of the report to request. Possible values can be found `here `__. +``--sa360-column`` ``columns`` (list) Dimensions and metrics to include in the report +``--sa360-saved-column`` ``saved_columns`` (list) (Optional) Saved columns to report. Documentation can be found `here `__. +``--sa360-start-date`` ``start_date`` Start date of the period to request (format: YYYY-MM-DD) +``--sa360-end-date`` ``end_date`` End date of the period to request (format: YYYY-MM-DD) +``--sa360-date-range`` ``date_range`` Date range. By default, not available in SA360, so choose among ACK default values: YESTERDAY, LAST_7_DAYS, PREVIOUS_WEEK, PREVIOUS_MONTH, LAST_90_DAYS +============================== ========================== ======================================================================================================================================================= + +See documentation `here `__ for a better understanding of the parameters. + +==================== +Google Sheets Reader +==================== + +---------- +Source API +---------- + +`Google Sheets API `__ + +------------------------- +How to obtain credentials +------------------------- + +To use the Google Sheets Reader you must first retrieve your credentials. In order to do so, head to console.cloud.google.com. In the header, chose your project or create a new one. Next step is to enable the Google Drive and Google Sheets APIs in the API Library. You’ll find it in the *APIs & Services* tab. Now that Google Drive API is enabled, click on the *Create credentials* button on the upper-right corner and enter these informations : + +- Which API are you using? > Google Drive API +- Where will you be calling the API from? > Web server +- What data will you be accessing? > Application data +- Are you planning to use this API with App Engine or Compute Engine? > No, I'm not using them + +Click on *What credentials do I need* and complete the form. You will find the credentials you need in the .JSON file that will start downloading automatically right after. + +---------- +Quickstart +---------- + +This command allows you to retrieve the desired information from a Google Sheet file row-by-row in a dictionary format. For example, given 3 columns a, b, c and 2 rows with respectively the values d, e, f and g, h, i, we would obtain such a dictionary: + +.. code-block:: shell + + {"a": "d", "b": "e", "c": "f"} + {"a": "g", "b": "h", "c": "i"} + +------------ +Command name +------------ + +CMD: ``read_gs`` + +JSON: ``google_sheets`` + +--------------- +Command options +--------------- + +============================== ===================== ============================================================================================================================================================== +CMD Options JSON Options Definition +============================== ===================== ============================================================================================================================================================== +``--gs-project-id`` ``project_id`` Project ID that is given by Google services once you have created your project in the Google Cloud Console. You can retrieve it in the .JSON credential file. +``--gs-private-key-id`` ``private_key_id`` Private key ID given by Google services once you have added credentials to the project. You can retrieve it in the .JSON credential file. +``--gs-private-key-path`` ``private_key_path`` The path to the private key that is stored in a txt file. You can retrieve it first in the .JSON credential file. +``--gs-client-email`` ``client_email`` Client e-mail given by Google services once you have added credentials to the project. You can retrieve it in the .JSON credential file. +``--gs-client-id`` ``client_id`` Client ID given by Google services once you have added credentials to the project. You can retrieve it in the .JSON credential file. +``--gs-client-cert`` ``client_cert`` Client certificate given by Google services once you have added credentials to the project. You can retrieve it in the .JSON credential file. +``--gs-file-name`` ``file_name`` The name you have given to your Google Sheet file +``--gs-page-number`` ``page_number`` The page number you want to access. The number pages starts at 0. +============================== ===================== ============================================================================================================================================================== + +=================== +MyTarget Reader +=================== + +---------- +Source API +---------- + +`Mytarget API `__ + +------------------------- +How to obtain credentials +------------------------- + +The mytarget API uses the OAuth2 protocol. There is not a single way to generate credentials, you can find the 3 ways to retrieve your credentials below : + +`Get your mytarget credentials `__ + +You should now have an access token and a refresh token. Save them carefully. + +---------- +Quickstart +---------- + +Say you want to retrieve for all campaigns and its associated banners and stats of a specific advertiser from the 01/01/2020 to the 07/01/2020. You can run: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_mytarget --mytarget-client-id --mytarget-client-secret --mytarget-refresh-token --mytarget-request-type 'general' --mytarget-start-date --mytarget-end-date write_console + + +If you just want to get the budget instead of the general statistics of each campaign you can try the following: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_mytarget --mytarget-client-id --mytarget-client-secret --mytarget-refresh-token --mytarget-request-type 'budget' --mytarget-start-date --mytarget-end-date write_console + + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_mytarget`` + +JSON: ``mytarget`` + +--------------- +Command options +--------------- + +============================== ================== ========================================================================================================================================================== +CMD Options JSON Options Definition +============================== ================== ========================================================================================================================================================== +``--mytarget-client-id`` ``client_id`` Client ID you generated +``--mytarget-client-secret`` ``client_secret`` Client secret you generated. +``--mytarget-refresh-token`` ``refresh_token`` Secret token you retrieved during the process of getting tokens +``--mytarget-request-type`` ``request_type`` Type of report you want to retrieve: performance or budgets. +``--mytarget-start-date`` ``start_date`` Start date of the period to request (format: YYYY-MM-DD) +``--mytarget-end-date`` ``end_date`` End date of the period to request (format: YYYY-MM-DD) +``--mytarget-date-range`` ``date_range`` Date range. By default, not available in MyTarget, so choose among ACK default values: YESTERDAY, LAST_7_DAYS, PREVIOUS_WEEK, PREVIOUS_MONTH, LAST_90_DAYS +============================== ================== ========================================================================================================================================================== + +============ +MySQL Reader +============ + +---------- +Source ORM +---------- + +`SQL Alchemy `__ (using the ``mysql+pymysql`` engine) + +---------- +Quickstart +---------- + +The following command retrieves all records from the table (equivalent to ``SELECT * FROM ``). + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_mysql --mysql-user --mysql-password --mysql-host --mysql-port --mysql-database --mysql-table write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_mysql`` + +JSON: ``mysql`` + +--------------- +Command options +--------------- + +===================================== ============================= ========================================================================================================= +CMD Options JSON Options Definition +===================================== ============================= ========================================================================================================= +``--mysql-user`` ``user`` Database user +``--mysql-password`` ``password`` Database password +``--mysql-host`` ``host`` Database host +``--mysql-port`` ``port`` Database port +``--mysql-database`` ``database`` Database name +``--mysql-query`` ``query`` SQL query (you must specify either a query or a table) +``--mysql-query-name`` ``query-name`` SQL query name (required if you specify a query) +``--mysql-table`` ``table`` Database table on which you want to run a `SELECT *` query (you must specify either a query or a table) +``--mysql-watermark-column`` ``watermark-column`` Watermark column (required when using state management) +``--mysql-watermark-init`` ``watermark-init`` Initial watermark column value (required when using state management) +``--mysql-redis-state-service-name`` ``redis-state-service-name`` Redis state service hash name +``--mysql-redis-state-service-host`` ``redis-state-service-host`` Redis state service host +``--mysql-redis-state-service-port`` ``redis-state-service-port`` Redis state service port +===================================== ============================= ========================================================================================================= + +============== +Radarly Reader +============== + +---------- +Source API +---------- + +`Radarly API `__ + +---------- +Quickstart +---------- + +The following command retrieves data from posts located under the project ```` and associated to the focus IDs ``00001`` and ``00002``, from 2020-01-01 to 2020-01-03. + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_radarly --radarly-client-id --radarly-client-secret --radarly-pid --radarly-focus-id 00001 --radarly-focus-id 00002 --radarly-start-date 2020-01-01 --radarly-end-date 2020-01-03 + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_radarly`` + +JSON: ``radarly`` + +--------------- +Command options +--------------- + +============================================== ====================================== ====================================================================================================================================================================================================================== +CMD Options JSON Options Definition +============================================== ====================================== ====================================================================================================================================================================================================================== +``--radarly-client-id`` ``client_id`` Radarly Client ID +``--radarly-client-secret`` ``client_secret`` Radarly Client Secret +``--radarly-pid`` ``pid`` Radarly Project ID +``--radarly-focus-id`` ``focus_id`` (list) Focus IDs (several can be provided) +``--radarly-start-date`` ``start_date`` Start date of the period to request +``--radarly-end-date`` ``end_date`` End date of the period to request +``--radarly-api-request-limit`` ``api_request_limit`` Max number of posts to be requested in a single API request +``--radarly-api-date-period-limit`` ``api_date_period_limit`` Max number of posts to be requested in a single Search query +``--radarly-api-window`` ``api_window`` Duration of the rate limit window +``--radarly-api-quaterly-posts-limit`` ``api_quaterly_posts_limit`` Max number of posts to be requested over the rate limit window +``--radarly-api-throttle`` ``api_throttle`` If set to True (default), forces the reader to abide by `official API rate limits `__, using the 2 above parameters. +``--radarly-throttling-threshold-coefficient`` ``throttling_threshold_coefficient`` Throttling threshold coefficient +============================================== ====================================== ====================================================================================================================================================================================================================== + +================= +Salesforce Reader +================= + +---------- +Source API +---------- + +`Lightning Platform REST API `__ + +---------- +Quickstart +---------- + +The following command retrieves name field values from all Account records. + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_salesforce --salesforce-consumer-key --salesforce-consumer-secret --salesforce-user --salesforce-password --salesforce-query 'SELECT name FROM Account' --salesforce-query-name ack-account-name-query write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------------------- +How to obtain credentials +------------------------- + +Create a Connected App by following the instructions detailed `on this page `__: it will generate your authentication credentials. + +------------ +Command name +------------ + +CMD: ``read_salesforce`` + +JSON: ``salesforce`` + +--------------- +Command options +--------------- + +================================== ===================== ================================================================================================================================================================================================================================================================================================= +CMD Options JSON Options Definition +================================== ===================== ================================================================================================================================================================================================================================================================================================= +``--salesforce-consumer-key`` ``consumer_key`` Client ID of your Salesforce Connected App +``--salesforce-consumer-secret`` ``consumer_secret`` Client Secret of your Salesforce Connected App +``--salesforce-user`` ``user`` Salesforce username +``--salesforce-password`` ``password`` Salesforce password +``--salesforce-object-type`` ``object_type`` Salesforce object type (you must specify either a Salesforce object type or a SOQL query). With this configuration, the command will retrieve the values of all the fields from the given object records (equivalent to the SOQL query: `SELECT FROM `). +``--salesforce-query`` ``query`` SOQL query (you must specify either a Salesforce object type or a SOQL query). You can find documentation on Salesforce Object Query Language (SOQL) `here `__. +``--salesforce-query-name`` ``query_name`` SOQL query name (required if you specify a SOQL query) +``--salesforce-watermark-column`` ``watermark_column`` Salesforce watermark column (required when using state management) +``--salesforce-watermark-init`` ``watermark_init`` Initial Salesforce watermark column value (required when using state management) +================================== ===================== ================================================================================================================================================================================================================================================================================================= + +===================== +The Trade Desk Reader +===================== + +---------- +Source API +---------- + +`The Trade Desk API `__ + +------------------------- +How to obtain credentials +------------------------- + +- Ask your Account Representative to **give you access to The Trade Desk API and UI** +- He will generally provide you with **two distinct accounts**: an **API account**, allowing you to make API calls (*Login: ttd_api_{XXXXX}@client.com*), and a **UI account**, allowing you to navigate on The Trade Desk UI to create Report Templates (*Login: your professional e-mail address*) +- Pass **the Login and Password of your API account** to The Trade Desk connector + +---------- +Quickstart +---------- + +To request dimensions and metrics to The Trade Desk API, you should first **create a Report Template in The Trade Desk UI**, by following the below process: + +- Connect to `The Trade Desk UI `__ using the Login and Password of your UI account +- Navigate to *Reports* > *My Reports* to land on the *Report Templates* section +- Clone an existing Report Template, edit it to keep only the dimensions and metrics that you want to collect, and save it: it will appear under the *Mine* section +- Provide the exact name of the Report Template you have just created under the CLI option ``--ttd-report-template-name`` of The Trade Desk connector: the connector will "schedule" a report instance (which may take a few minutes to run), and fetch data to the location of your choice + +The following command retrieves the data associated to the Report template named "*adgroup_performance_report*" between 2020-01-01 and 2020-01-03, filtered on the PartnerId . + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_ttd --ttd-login --ttd-password --ttd-partner-id --ttd-report-template-name adgroup_performance_report --ttd-start-date 2020-01-01 --ttd-end-date 2020-01-03 write_console + +Didn't work? See [troubleshooting](#troubleshooting) section. + +------------ +Command name +------------ + +CMD: ``read_ttd`` + +JSON: ``the_trade_desk`` + +--------------- +Command options +--------------- + +============================== ========================= =========================================================================================================================================================================================== +CMD Options JSON Options Definition +============================== ========================= =========================================================================================================================================================================================== +``--ttd-login`` ``login`` Login of your API account +``--ttd-password`` ``password`` Password of your API account +``--ttd-advertiser-id`` ``advertiser_id`` (list) Advertiser Ids for which report data should be fetched +``--ttd-report-template-name`` ``report_template_name`` Exact name of the Report Template to request. Existing Report Templates can be found within the `MyReports section `__ of The Trade Desk UI. +``--ttd-report-schedule-name`` ``report_schedule_name`` Name of the Report Schedule to create +``--ttd-start-date`` ``start_date`` Start date of the period to request (format: YYYY-MM-DD) +``--ttd-end-date`` ``end_date`` End date of the period to request (format: YYYY-MM-DD) +``--ttd-date-range`` ``date_range`` Date range. By default, not available in The Trade Desk, so choose among ACK default values: YESTERDAY, LAST_7_DAYS, PREVIOUS_WEEK, PREVIOUS_MONTH, LAST_90_DAYS +============================== ========================= =========================================================================================================================================================================================== + +If you need any further information, the documentation of The Trade Desk API can be found `here `__. + +================== +Twitter Ads Reader +================== + +---------- +Source API +---------- + +`Twitter Ads API `__ + +------------------------- +How to obtain credentials +------------------------- + +- **Apply for a developer account** through `this link `__. +- **Create a Twitter app** on the developer portal: it will generate your authentication credentials. +- **Apply for Twitter Ads API access** by filling out `this form `__. Receiving Twitter approval may take up to 7 business days. +- **Get access to the Twitter Ads account** you wish to retrieve data for, on the @handle that you used to create your Twitter App. Be careful, access levels matter: with an *Ad Manager* access, you will be able to request all report types; with a *Campaign Analyst* access, you will be able to request all report types, except ENTITY reports on Card entities. + +---------- +Quickstart +---------- + +The Twitter Ads Reader can collect **3 types of reports**, making calls to 4 endpoints of the Twitter Ads API: + +- **ANALYTICS reports**, making calls to the `Asynchronous Analytics endpoint `__. These reports return performance data for a wide range of metrics, that **can be aggregated over time**. Output data **can be splitted by day** when requested over a larger time period. +- **REACH reports**, making calls to the `Reach and Average Frequency endpoint `__. These reports return performance data with a focus on reach and frequency metrics, that **cannot be aggregated over time** (*e.g. the reach of day A and B is not equal to the reach of day A + the reach of day B, as it counts unique individuals*). Output data **cannot be splitted by day** when requested over a larger time period. These reports are available **only for the Funding Instrument and Campaign entities**. +- **ENTITY reports**, making calls to `Campaign Management endpoints `__ if the selected entity is Funding Instrument, Campaign, Line Item, Media Creative or Promoted Tweet, and to the `Creative endpoint `__ if the selected entity is Card. These reports return details on entity configuration since the creation of the Twitter Ads account. + +*Call example for ANALYTICS reports*: this call will collect engagement metrics for Line Item entities, splitting the results by day, from 2020-01-01 to 2020-01-03: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_twitter --twitter-consumer-key --twitter-consumer-secret --twitter-access-token --twitter-access-token-secret --twitter-account-id --twitter-report-type ANALYTICS --twitter-entity LINE_ITEM --twitter-metric-group ENGAGEMENT --twitter-segmentation-type AGE --twitter-granularity DAY --twitter-start-date 2020-01-01 --twitter-end-date 2020-01-03 write_console + +*Call example for REACH reports*: this call will collect reach metrics (*total_audience_reach, average_frequency*) for Campaign entities, from 2020-01-01 to 2020-01-03: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_twitter --twitter-consumer-key --twitter-consumer-secret --twitter-access-token --twitter-access-token-secret --twitter-account-id --twitter-report-type REACH --twitter-entity CAMPAIGN --twitter-start-date 2020-01-01 --twitter-end-date 2020-01-03 write_console + +*Call example for ENTITY reports*: this call collects details on the configuration of Campaign entities (id, name, total_budget_amount_local_micro, currency), since the creation of the Twitter Ads account: + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_twitter --twitter-consumer-key --twitter-consumer-secret --twitter-access-token --twitter-access-token-secret --twitter-account-id --twitter-report-type REACH --twitter-entity CAMPAIGN --twitter-entity-attribute id --twitter-entity-attribute name --twitter-entity-attribute total_budget_amount_local_micro --twitter-entity-attribute currency write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_twitter`` + +JSON: ``twitter`` + +--------------- +Command options +--------------- + +========================================== ================================ ================================================================================================================================================================================================================================= +CMD Options JSON Options Definition +========================================== ================================ ================================================================================================================================================================================================================================= +``--twitter-consumer-key`` ``consumer_key`` API key, available in the 'Keys and tokens' section of your Twitter Developer App. +``--twitter-consumer-secret`` ``consumer_secret`` API secret key, available in the 'Keys and tokens' section of your Twitter Developer App. +``--twitter-access-token`` ``access_token`` Access token, available in the 'Keys and tokens' section of your Twitter Developer App. +``--twitter-access-token-secret`` ``access_token_secret`` Access token secret, available in the 'Keys and tokens' section of your Twitter Developer App. +``--twitter-account-id`` ``account_id`` Specifies the Twitter Account ID for which the data should be returned. +``--twitter-report-type`` ``report_type`` Specifies the type of report to collect. Possible values: ANALYTICS, REACH, ENTITY. +``--twitter-entity`` ``entity`` Specifies the entity type to retrieve data for. Possible values: FUNDING_INSTRUMENT, CAMPAIGN, LINE_ITEM, MEDIA_CREATIVE, PROMOTED_TWEET, CARD. +``--twitter-entity-attribute`` ``entity_attribute`` (list) Specific to ENTITY reports. Specifies the entity attribute (configuration detail) that should be returned. To get possible values, print the ENTITY_ATTRIBUTES variable on ack/helpers/twitter_helper.py +``--twitter-granularity`` ``granularity`` Specific to ANALYTICS reports. Specifies how granular the retrieved data should be. Possible values: TOTAL (default), DAY. +``--twitter-metric-group`` ``metric_group`` (list) Specific to ANALYTICS reports. Specifies the list of metrics (as a group) that should be returned. Possible values can be found `here `__. +``--twitter-placement`` ``placement`` Specific to ANALYTICS reports. Scopes the retrieved data to a particular placement. Possible values: ALL_ON_TWITTER (default), PUBLISHER_NETWORK. +``--twitter-segmentation-type`` ``segmentation_type`` Specific to ANALYTICS reports. Specifies how the retrieved data should be segmented. Possible values can be found `here `__. +``--twitter-platform`` ``platform`` Specific to ANALYTICS reports. Required if segmentation_type is set to DEVICES or PLATFORM_VERSION. Possible values can be identified through the targeting_criteria/locations +``--twitter-country`` ``country`` Specific to ANALYTICS reports. Required if segmentation_type is set to CITIES, POSTAL_CODES, or REGION. Possible values can be identified through the GET targeting_criteria/platforms endpoint. +``--twitter-start-date`` ``start_date`` Start date of the period to request (format: YYYY-MM-DD). +``--twitter-end-date`` ``end_date`` End date of the period to request (format: YYYY-MM-DD). +``--twitter-date-range`` ``date_range`` Date range. By default, not available in Twitter, so choose among ACK default values: YESTERDAY, LAST_7_DAYS, PREVIOUS_WEEK, PREVIOUS_MONTH, LAST_90_DAYS +``--twitter-add-request-date-to-report`` ``add_request_date_to_report`` If set to True (default: False), the date on which the request is made will appear on each report record. +========================================== ================================ ================================================================================================================================================================================================================================= + +If you need any further information, the documentation of Twitter Ads API can be found `here `__. To get a better understanding of **Twitter Ads Hierrarchy and Terminology**, we advise you to have a look at `this page `__. + +============== +Yandex Readers +============== + +---------- +Source API +---------- + +`Yandex Direct API `__ + +------------------------- +How to obtain credentials +------------------------- + +In order to access Yandex Direct API, you need two accounts: an advertiser account and a developer account. +Here is the process: + +1. Create a developer account if you don't already have one. Click on the *Get started* button on this `page `__. +2. Create and register an app that will access Yandex Direct API via `Yandex OAuth `__. +3. Keep app client id safe. Log in with your advertiser account and `give permission to the app to access your data `__. +4. Store your token very carefully. +5. Log out and log in as a developer and `ask permission to access Yandex Direct API `__ (ask for Full access). Fill in the form. +6. Wait for Yandex support to reply but it should be within a week. + +====================== +Yandex Campaign Reader +====================== + +`Official documentation `__ + +---------- +Quickstart +---------- + +The following command retrieves the daily budget of all your campaigns, since your account creation. + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_yandex_campaigns --yandex-token --yandex-field-name Id --yandex-field-name Name --yandex-field-name DailyBudget write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_yandex_campaigns`` + +JSON: ``yandex_campaign`` + +--------------- +Command options +--------------- + +====================================== =================================== ======================================================================================================================================================================== +CMD Options JSON Options Definition +====================================== =================================== ======================================================================================================================================================================== +``--yandex-token`` ``token`` Bear token that allows you to authenticate to the API +``--yandex-campaign-id`` ``campaign_ids`` (list) (Optional) Selects campaigns with the specified IDs. +``--yandex-campaign-state`` ``campaign_states`` (list) (Optional) Selects campaigns with the specified states. Possible values can be found `here `__. +``--yandex-campaign-status`` ``campaign_statuses`` (list) (Optional) Selects campaigns with the specified statuses. Possible values can be found `here `__. +``--yandex-campaign-payment-status`` ``campaign_payment_statuses`` (list) (Optional) Selects campaigns with the specified payment `statuses `__. +``--yandex-field-name`` ``fields`` (list) Parameters to get that are common to all types of campaigns. +====================================== =================================== ======================================================================================================================================================================== + +======================== +Yandex Statistics Reader +======================== + +`Official documentation `__ + +---------- +Quickstart +---------- + +The following command retrieves a performance report for all your campaigns, since your account creation. + +.. code-block:: shell + + python ack/entrypoints/cli/main.py read_yandex_statistics --yandex-token --yandex-report-type AD_PERFORMANCE_REPORT --yandex-field-name AdFormat --yandex-field-name AdId --yandex-field-name Impressions --yandex-include-vat True --yandex-report-language en --yandex-field-name AdGroupName --yandex-field-name AdGroupId --yandex-field-name AdNetworkType --yandex-field-name CampaignId --yandex-field-name CampaignName --yandex-field-name CampaignType --yandex-field-name Date --yandex-field-name Device --yandex-field-name Clicks --yandex-field-name Conversions --yandex-field-name Cost --yandex-date-range ALL_TIME write_console + +*Didn't work?* See the `Troubleshooting`_ section. + +------------ +Command name +------------ + +CMD: ``read_yandex_statistics`` + +JSON: ``yandex_statistics`` + +--------------- +Command options +--------------- + +Detailed version `here `__. + +============================== ==================== ===================================================================================================================================================================== +CMD Options JSON Options Definition +============================== ==================== ===================================================================================================================================================================== +``--yandex-token`` ``token`` Bear token that allows you to authenticate to the API +``--yandex-report-language`` ``report_language`` (Optional) Language of the report. Possible values can be found `here `__. +``--yandex-filter`` ``filters`` (list) (Optional) Filters on a particular field. +``--yandex-max-rows`` ``max_rows`` (Optional) The maximum number of rows in the report. +``--yandex-field-name`` ``fields`` (list) Information you want to collect. Possible values can be found `here `__. +``--yandex-report-type`` ``report_type`` Type of report. Linked to the fields you want to select. +``--yandex-date-range`` ``date_range`` Possible values can be found `here `__. +``--yandex-include-vat`` ``include_vat`` Adds VAT to your expenses if set to True +``--yandex-date-start`` ``date_start`` (Optional) Selects data on a specific period of time. Combined with ``--yandex-date-stop`` and ``--yandex-date-range`` set to CUSTOM_DATE. +``--yandex-date-stop`` ``date_stop`` (Optional) Selects data on a specific period of time. Combined with ``--yandex-date-start`` and ``--yandex-date-range`` set to CUSTOM_DATE. +============================== ==================== ===================================================================================================================================================================== + +=============== +Troubleshooting +=============== + +You encountered an issue when running a Reader command and you don't know what's going on? +You may find an answer in the troubleshooting guide below. + +1. Have you installed ACK dependencies? In order to run ACK, you need to install all dependencies. First create a `virtual environment `__ and then run ``pip install -r requirements.txt``. +2. Have you set ``PYTHONPATH`` environment variable to the root of ACK folder? +3. Have you checked logs? The code has been implemented so that every error is logged. diff --git a/docs/source/streams.rst b/docs/source/streams.rst new file mode 100644 index 00000000..c14e1750 --- /dev/null +++ b/docs/source/streams.rst @@ -0,0 +1,7 @@ +======= +Streams +======= + +**Streams are local objects used by writers to process individual records collected from the source.** + +*About to develop a new stream?* See the :ref:`getting_started:How to develop a new stream` section. diff --git a/docs/source/to_go_further.rst b/docs/source/to_go_further.rst new file mode 100644 index 00000000..54916ec7 --- /dev/null +++ b/docs/source/to_go_further.rst @@ -0,0 +1,26 @@ +############# +To go further +############# + +============================================================================= +Build a Docker image of the application and push it to GCP Container Registry +============================================================================= + +Update the values of the context variables featured in the .env module: + +- ``PROJECT_ID``: GCP Project ID +- ``DOCKER_IMAGE``: image name +- ``DOCKER_TAG``: tag name +- ``DOCKER_REGISTRY``: registry hostname (e.g. eu.gcr.io for hosts located in the EU) + +Build ACK image: + +.. code-block:: shell + + make build_base_image + +Push ACK image to GCP Container Registry: + +.. code-block:: shell + + make publish_base_image diff --git a/docs/source/writers.rst b/docs/source/writers.rst new file mode 100644 index 00000000..e0f469c5 --- /dev/null +++ b/docs/source/writers.rst @@ -0,0 +1,192 @@ +####### +Writers +####### + +**Writers are writing output stream records to the destination of your choice.** + +*About to develop a new writer?* See the :ref:`getting_started:How to develop a new writer` section. + +*Just want to use an existing writer?* This page provides you with documentation on available commands: + +================ +Amazon S3 Writer +================ + +---------- +Quickstart +---------- + +The following command would allow you to: + +- write output stream records to a blob named ``google_analytics_report_2020-01-01.njson`` +- under the Amazon S3 bucket ``ack_extracts`` +- organized according to the following path: ``ack_extracts/FR/google_analytics/google_analytics_report_2020-01-01.njson`` + +.. code-block:: shell + + write_s3 --s3-bucket-name ack_extracts --s3-prefix FR/google_analytics --s3-filename google_analytics_report_2020-01-01.njson --s3-bucket-region --s3-access-key-id --s3-access-key-secret + +------------ +Command name +------------ + +CMD: ``write_s3`` + +JSON: ``amazon_s3`` + +--------------- +Command options +--------------- + +============================== ====================== ============================== +CMD Options JSON Options Definition +============================== ====================== ============================== +``--s3-bucket-name`` ``bucket_name`` S3 bucket name +``--s3-prefix`` ``prefix`` S3 blob prefix +``--s3-filename`` ``filename`` S3 blob name +``--s3-bucket-region`` ``bucket_region`` S3 bucket region +``--s3-access-key-id`` ``access_key_id`` S3 access key ID +``--s3-access-key-secret`` ``access_key_secret`` S3 access key secret +============================== ====================== ============================== + +====================== +Google BigQuery Writer +====================== + +---------- +Quickstart +---------- + +The following command would allow you to: + +- store output stream records into the BigQuery table ``google_analytics`` +- located under the BigQuery dataset ``ack`` + +As a preliminary step, stream data would be uploaded into a temporary blob located under the Cloud Storage bucket ``ack_extracts``. + +.. code-block:: shell + + write bq --bq-dataset ack --bq-table google_analytics --bq-bucket ack-extracts + +------------ +Command name +------------ + +CMD: ``write_bq`` + +JSON: ``google_bigquery`` + +--------------- +Command options +--------------- + +============================== ====================== ================================================================================================================================================= +CMD Options JSON Options Definition +============================== ====================== ================================================================================================================================================= +``--bq-dataset`` ``dataset`` BigQuery dataset name +``--bq-table`` ``table`` BigQuery table name +``--bq-write-disposition`` ``write-disposition`` BigQuery write disposition. Possible values: TRUNCATE (default), APPEND +``--bq-partition-column`` ``partition-column`` (Optional) Field to be used as a partition column (more information on `this page `__) +``--bq-location`` ``location`` BigQuery dataset location. Possible values: EU (default), US. +``--bq-bucket`` ``bucket`` Cloud Storage bucket in which stream data should be written as a first step, before being uploaded into the BigQuery destination table +``--bq-keep-files`` ``keep-files`` False (default) if Cloud Storage blob should be deleted once the data has been uploaded into the BigQuery destination table, True otherwise +============================== ====================== ================================================================================================================================================= + +=========================== +Google Cloud Storage Writer +=========================== + +---------- +Quickstart +---------- + +The following command would allow you to: + +- write output stream records to a blob named ``google_analytics_report_2020-01-01.njson`` +- located under the Cloud Storage bucket ``ack_extracts`` +- organized according to the following path: ``ack_extracts/FR/google_analytics/google_analytics_report_2020-01-01.njson`` + +.. code-block:: shell + + write_gcs --gcs-project-id --gcs-bucket ack_extracts --gcs-prefix FR/google_analytics --gcs-filename google_analytics_report_2020-01-01.njson + +------------ +Command name +------------ + +CMD: ``write_gcs`` + +JSON: ``google_cloud_storage`` + +--------------- +Command options +--------------- + +============================== =============== ============================== +CMD Options JSON Options Definition +============================== =============== ============================== +``--gcs-project-id`` ``project_id`` GCP project ID +``--gcs-bucket`` ``bucket`` Cloud Storage bucket name +``--gcs-prefix`` ``prefix`` Cloud Storage blob prefix +``--gcs-file-name`` ``file_name`` Cloud Storage blob name +============================== =============== ============================== + +============ +Local Writer +============ + +---------- +Quickstart +---------- + +The following command would allow you to write a file ``google_analytics_report_2020-01-01.njson`` on the ``~/Desktop`` directory of your local machine: + +.. code-block:: shell + + write_local --local-directory ~/Desktop/ --local-file-name google_analytics_report_2020-01-01.njson + +------------ +Command name +------------ + +CMD: ``write_local`` + +JSON: ``local`` + +--------------- +Command options +--------------- + +============================== ============== =============================================================== +CMD Options JSON Options Definition +============================== ============== =============================================================== +``--local-directory (-d)`` ``directory`` Directory in which the file should be stored +``--local-file-name (-n)`` ``file_name`` File name +============================== ============== =============================================================== + +============== +Console Writer +============== + +---------- +Quickstart +---------- + +The following command would allow you to write stream output records directly into your terminal, which is very convenient for debugging: + +.. code-block:: shell + + write_console + +------------ +Command name +------------ + +CMD: ``write_console`` + +JSON: ``console`` + +--------------- +Command options +--------------- +*This writer command expects no options.* diff --git a/documentation/images/credentials_gs.png b/documentation/images/credentials_gs.png deleted file mode 100644 index 34373a1e..00000000 Binary files a/documentation/images/credentials_gs.png and /dev/null differ diff --git a/nck/entrypoint.py b/nck/entrypoint.py deleted file mode 100644 index cb3f6a5a..00000000 --- a/nck/entrypoint.py +++ /dev/null @@ -1,86 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click - -from nck.writers import writers, Writer -from nck.readers import readers, Reader -import nck.state_service as state -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.streams.json_stream import JSONStream - - -@click.group(chain=True) -@click.option("--state-service-name") -@click.option("--state-service-host", help="Redis server IP address") -@click.option("--state-service-port", help="Redis server port", default=6379) -@click.option("--normalize-keys", default=False, - help="(Optional) If set to true, will normalize the output files keys, removing " - "white spaces and special characters.", type=bool) -def app(state_service_name, state_service_host, state_service_port, normalize_keys): - if (state_service_name or state_service_host) and not ( - state_service_name and state_service_host - ): - raise click.BadParameter( - "You must specify both a name and a host for the state service" - ) - - -@app.resultcallback() -def run(processors, state_service_name, state_service_host, state_service_port, normalize_keys): - state.configure(state_service_name, state_service_host, state_service_port) - - processor_instances = [p() for p in processors] - - _readers = list(filter(lambda o: isinstance(o, Reader), processor_instances)) - _writers = list(filter(lambda o: isinstance(o, Writer), processor_instances)) - - if len(_readers) < 1: - raise click.BadParameter("You must specify a reader") - - if len(_readers) > 1: - raise click.BadParameter("You cannot specify multiple readers") - - if len(_writers) < 1: - raise click.BadParameter("You must specify at least one writer") - - reader = _readers[0] - # A stream should represent a full file! - for stream in reader.read(): - for writer in _writers: - if normalize_keys and issubclass(stream.__class__, JSONStream): - writer.write(NormalizedJSONStream.create_from_stream(stream)) - else: - writer.write(stream) - - -def cli_entrypoint(): - build_commands() - app() - - -def build_commands(): - for writer in writers: - app.add_command(writer) - - for reader in readers: - app.add_command(reader) - - -if __name__ == "__main__": - build_commands() - app() diff --git a/nck/helpers/dv360_helper.py b/nck/helpers/dv360_helper.py deleted file mode 100644 index 14b0d608..00000000 --- a/nck/helpers/dv360_helper.py +++ /dev/null @@ -1,20 +0,0 @@ -FILE_NAMES = { - "FILE_TYPE_INSERTION_ORDER": "InsertionOrders", - "FILE_TYPE_CAMPAIGN": "Campaigns", - "FILE_TYPE_MEDIA_PRODUCT": "MediaProducts", - "FILE_TYPE_LINE_ITEM": "LineItems", - "FILE_TYPE_AD_GROUP": "AdGroups", - "FILE_TYPE_AD": "AdGroupAds" -} - -FILE_TYPES = FILE_NAMES.keys() - -FILTER_TYPES = [ - "FILTER_TYPE_UNSPECIFIED", - "FILTER_TYPE_NONE", - "FILTER_TYPE_ADVERTISER_ID", - "FILTER_TYPE_CAMPAIGN_ID", - "FILTER_TYPE_MEDIA_PRODUCT_ID", - "FILTER_TYPE_INSERTION_ORDER_ID", - "FILTER_TYPE_LINE_ITEM_ID" -] diff --git a/nck/helpers/twitter_helper.py b/nck/helpers/twitter_helper.py deleted file mode 100644 index 042752a6..00000000 --- a/nck/helpers/twitter_helper.py +++ /dev/null @@ -1,72 +0,0 @@ -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -from twitter_ads.campaign import FundingInstrument, Campaign, LineItem -from twitter_ads.creative import MediaCreative, PromotedTweet, CardsFetch - - -REPORT_TYPES = ["ANALYTICS", "REACH", "ENTITY"] - -ENTITY_OBJECTS = { - "FUNDING_INSTRUMENT": FundingInstrument, - "CAMPAIGN": Campaign, - "LINE_ITEM": LineItem, - "MEDIA_CREATIVE": MediaCreative, - "PROMOTED_TWEET": PromotedTweet, -} - -ENTITY_ATTRIBUTES = { - **{ - entity: list(ENTITY_OBJECTS[entity].__dict__["PROPERTIES"].keys()) - for entity in ENTITY_OBJECTS - }, - "CARD": list(CardsFetch.__dict__["PROPERTIES"].keys()), -} - -GRANULARITIES = ["DAY", "TOTAL"] - -METRIC_GROUPS = [ - "ENGAGEMENT", - "BILLING", - "VIDEO", - "MEDIA", - "MOBILE_CONVERSION", - "WEB_CONVERSION", - "LIFE_TIME_VALUE_MOBILE_CONVERSION", -] - -PLACEMENTS = [ - "ALL_ON_TWITTER", - "PUBLISHER_NETWORK", -] - -SEGMENTATION_TYPES = [ - "AGE", - "APP_STORE_CATEGORY", - "AUDIENCES", - "CONVERSATIONS", - "CONVERSION_TAGS", - "DEVICES", - "EVENTS", - "GENDER", - "INTERESTS", - "KEYWORDS", - "LANGUAGES", - "LOCATIONS", - "METROS", - "PLATFORMS", - "PLATFORM_VERSIONS", - "POSTAL_CODES", - "REGIONS", - "SIMILAR_TO_FOLLOWERS_OF_USER", - "TV_SHOWS", -] diff --git a/nck/helpers/yandex_helper.py b/nck/helpers/yandex_helper.py deleted file mode 100644 index 9990ecba..00000000 --- a/nck/helpers/yandex_helper.py +++ /dev/null @@ -1,160 +0,0 @@ -LANGUAGES = ["en", "ru", "uk"] - -REPORT_TYPES = [ - "ACCOUNT_PERFORMANCE_REPORT", - "CAMPAIGN_PERFORMANCE_REPORT", - "ADGROUP_PERFORMANCE_REPORT", - "AD_PERFORMANCE_REPORT", - "CRITERIA_PERFORMANCE_REPORT", - "CUSTOM_REPORT", - "REACH_AND_FREQUENCY_PERFORMANCE_REPORT", - "SEARCH_QUERY_PERFORMANCE_REPORT" -] - -STATS_FIELDS = [ - "AdFormat", - "AdGroupId", - "AdGroupName", - "AdId", - "AdNetworkType", - "Age", - "AudienceTargetId", - "AvgClickPosition", - "AvgCpc", - "AvgCpm", - "AvgImpressionFrequency", - "AvgImpressionPosition", - "AvgPageviews", - "AvgTrafficVolume", - "BounceRate", - "Bounces", - "CampaignId", - "CampaignName", - "CampaignType", - "CarrierType", - "Clicks", - "ClickType", - "ConversionRate", - "Conversions", - "Cost", - "CostPerConversion", - "Criteria", - "CriteriaId", - "CriteriaType", - "Criterion", - "CriterionId", - "CriterionType", - "Ctr", - "Date", - "Device", - "DynamicTextAdTargetId", - "ExternalNetworkName", - "Gender", - "GoalsRoi", - "ImpressionReach", - "Impressions", - "ImpressionShare", - "Keyword", - "LocationOfPresenceId", - "LocationOfPresenceName", - "MatchedKeyword", - "MatchType", - "MobilePlatform", - "Month", - "Placement", - "Profit", - "Quarter", - "Query", - "Revenue", - "RlAdjustmentId", - "Sessions", - "Slot", - "SmartBannerFilterId", - "TargetingLocationId", - "TargetingLocationName", - "Week", - "WeightedCtr", - "WeightedImpressions", - "Year" -] - -CAMPAIGN_FIELDS = [ - "BlockedIps", - "ExcludedSites", - "Currency", - "DailyBudget", - "Notification", - "EndDate", - "Funds", - "ClientInfo", - "Id", - "Name", - "NegativeKeywords", - "RepresentedBy", - "StartDate", - "Statistics", - "State", - "Status", - "StatusPayment", - "StatusClarification", - "SourceId", - "TimeTargeting", - "TimeZone", - "Type" -] - -DATE_RANGE_TYPES = [ - "TODAY", - "YESTERDAY", - "THIS_WEEK_MON_TODAY", - "THIS_WEEK_SUN_TODAY", - "LAST_WEEK", - "LAST_BUSINESS_WEEK", - "LAST_WEEK_SUN_SAT", - "THIS_MONTH", - "LAST_MONTH", - "ALL_TIME", - "CUSTOM_DATE", - "AUT0", - "LAST_3_DAYS", - "LAST_5_DAYS", - "LAST_7_DAYS", - "LAST_14_DAYS", - "LAST_30_DAYS", - "LAST_90_DAYS", - "LAST_365_DAYS" -] - -OPERATORS = [ - "EQUALS", - "NOT_EQUALS", - "IN", - "NOT_IN", - "LESS_THAN", - "GREATER_THAN", - "STARTS_WITH_IGNORE_CASE", - "DOES_NOT_START_WITH_IGNORE_CASE", - "STARTS_WITH_ANY_IGNORE_CASE", - "DOES_NOT_START_WITH_ALL_IGNORE_CASE" -] - -CAMPAIGN_STATES = [ - "ARCHIVED", - "CONVERTED", - "ENDED", - "OFF", - "ON", - "SUSPENDED" -] - -CAMPAIGN_STATUSES = [ - "ACCEPTED", - "DRAFT", - "MODERATION", - "REJECTED" -] - -CAMPAIGN_PAYMENT_STATUSES = [ - "ALLOWED", - "DISALLOWED" -] diff --git a/nck/readers/README.md b/nck/readers/README.md deleted file mode 100644 index 535b6198..00000000 --- a/nck/readers/README.md +++ /dev/null @@ -1,742 +0,0 @@ -# NCK Readers - -Each reader role is to read data from external source and transform it into a Stream understable format to be written on GCS and BQ thanks to the corresponding writers. - -## List of Readers - -- Adobe Analytics 1.4 -- Adobe Analytics 2.0 -- Amazon S3 -- Facebook Marketing -- Google Ads -- Google Analytics -- Google Cloud Storage -- Google Campaign Manager -- Google Display & Video 360 -- Google Search Ads 360 -- Google Search Console -- Google Sheets -- Oracle -- MySQL -- Radarly -- SalesForce -- The Trade Desk -- Twitter Ads -- Yandex Campaign -- Yandex Statistics - -## Step to create a new Reader - -1. Create python module following naming nomenclature ``` [command]_reader.py ``` -2. Implement `read` method -3. Create click command with required options -4. Reference click command into [commands list](./__init__.py) -5. Update current README.md - -## Adobe Analytics Readers - -As of May 2020 (last update of this section of the documentation), **two versions of Adobe Analytics Reporting API are coexisting: 1.4 and 2.0**. As some functionalities of API 1.4 have not been made available in API 2.0 yet (Data Warehouse reports in particular), our Adobe Analytics Readers are also available in these two versions. - -#### How to obtain credentials - -Both Adobe Analytics Readers use the **JWT authentication framework**. -- Get developer access to Adobe Analytics (documentation can be found [here](https://helpx.adobe.com/enterprise/using/manage-developers.html)) -- Create a Service Account integration to Adobe Analytics on [Adobe Developer Console](https://console.adobe.io/) -- Use the generated JWT credentials (Client ID, Client Secret, Technical Account ID, Organization ID and private.key file) to retrieve your Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md)). All these parameters will be passed to Adobe Analytics Readers. - -### Adobe Analytics Reader 1.4 - -#### Source API - -[Analytics API v1.4](https://github.com/AdobeDocs/analytics-1.4-apis) - -#### Quickstart - -Call example to Adobe Analytics Reader 1.4, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: - -``` -python nck/entrypoint.py read_adobe --adobe-client-id --adobe-client-secret --adobe-tech-account-id --adobe-org-id --adobe-private-key --adobe-global-company-id --adobe-report-suite-id --adobe-date-granularity day --adobe-report-element-id trackingcode --adobe-report-metric-id visits --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 write_console -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -|CLI option|Documentation| -|--|--| -|`--adobe-client-id`|Client ID, that you can find on Adobe Developer Console| -|`--adobe-client-secret`|Client Secret, that you can find on Adobe Developer Console| -|`--adobe-tech-account-id`|Technical Account ID, that you can find on Adobe Developer Console| -|`--adobe-org-id`|Organization ID, that you can find on Adobe Developer Console| -|`--adobe-private-key`|Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as \n.| -|`--adobe-global-company-id`|Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md))| -|`--adobe-list-report-suite`|Should be set to *True* if you wish to request the list of available Adobe Report Suites (*default: False*). If set to *True*, the below parameters should be left empty.| -|`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| -|`--adobe-report-element-id`|ID of the element (i.e. dimension) to include in the report| -|`--adobe-report-metric-id`|ID of the metric to include in the report| -|`--adobe-date-granularity`|Granularity of the report. *Possible values: PREVIOUS_DAY, LAST_30_DAYS, LAST_7_DAYS, LAST_90_DAYS*| -|`--adobe-start-date`|Start date of the period to request (format: YYYY-MM-DD)| -|`--adobe-end-date`|End date of the period to request (format: YYYY-MM-DD)| - -#### Addtional information - -- **The full list of available elements and metrics** can be retrieved with the [GetElements](https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_GetElements.md) and [GetMetrics](https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_GetMetrics.md) methods. -- **Adobe Analytics Reader 1.4 requests Data Warehouse reports** (the "source" parameter is set to "warehouse" in the report description), allowing it to efficiently process multiple-dimension requests. -- **If you need further information**, the documentation of Adobe APIs 1.4 can be found [here](https://github.com/AdobeDocs/analytics-1.4-apis). - -### Adobe Analytics Reader 2.0 - -#### Source API - -[Analytics API v2.0](https://github.com/AdobeDocs/analytics-2.0-apis) - -#### Quickstart - -Call example to Adobe Analytics Reader 2.0, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: - -``` -python nck/entrypoint.py read_adobe_2_0 --adobe-2-0-client-id --adobe-2-0-client-secret --adobe-2-0-tech-account-id --adobe-2-0-org-id --adobe-2-0-private-key --adobe-2-0-global-company-id --adobe-2-0-report-suite-id --adobe-2-0-dimension daterangeday --adobe-2-0-dimension campaign --adobe-2-0-start-date 2020-01-01 --adobe-2-0-end-date 2020-01-31 --adobe-2-0-metric visits write_console -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -|CLI option|Documentation| -|--|--| -|`--adobe-2-0-client-id`|Client ID, that you can find on Adobe Developer Console| -|`--adobe-2-0-client-secret`|Client Secret, that you can find on Adobe Developer Console| -|`--adobe-2-0-tech-account-id`|Technical Account ID, that you can find on Adobe Developer Console| -|`--adobe-2-0-org-id`|Organization ID, that you can find on Adobe Developer Console| -|`--adobe-2-0-private-key`|Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as \n.| -|`--adobe-2-0-global-company-id`|Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md))| -|`--adobe-2-0-report-suite-id`|ID of the requested Adobe Report Suite| -|`--adobe-2-0-dimension`|Dimension to include in the report| -|`--adobe-2-0-metric`|Metric to include in the report| -|`--adobe-2-0-start-date`|Start date of the period to request (format: YYYY-MM-DD)| -|`--adobe-2-0-end-date`|Start date of the period to request (format: YYYY-MM-DD)| - -#### Additional information - -- **In API 2.0, dimension and metric names are slightly different from API 1.4**. To get new metric and dimension names and reproduce the behavior of Adobe Analytics UI as closely as possible, [enable the Debugger feature in Adobe Analytics Workspace](https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md): it allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. -- **In API 2.0, the date granularity parameter was removed, and should now be handled as a dimension**: a request featuring `--adobe-dimension daterangeday` will produce a report with a day granularity. -- **API 2.0 does not feature Data Warehouse reports yet** (along with other features, that are indicated on the "Current limitations" section of [this page](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/migration-guide.md)). For this reason, if you wish to collect multiple-dimension reports, Adobe Analytics Reader 1.4 might be a more efficient solution in terms of processing time. -- **If you need any further information**, the documentation of Adobe APIs 2.0 can be found [here](https://github.com/AdobeDocs/analytics-2.0-apis). - -## Amazon S3 Reader - -*Not documented yet.* - -## Facebook Marketing Reader - -#### Source API - -[Facebook Marketing API](https://developers.facebook.com/docs/marketing-api/reference/v7.0) - -#### Quickstart - -The Facebook Marketing Reader handles calls to 2 endpoints of the Facebook Marketing API: **Facebook Ad Insights** (to retrieve performance data), and **Facebook Ad Management** (to retrieve configuration data). - -*Example of Ad Insights Request* -``` -python nck/entrypoint.py read_facebook --facebook-access-token --facebook-object-id --facebook-breakdown age --facebook-breakdown gender --facebook-action-breakdown action_type --facebook-field ad_id --facebook-field ad_name --facebook-field impressions --facebook-field clicks --facebook-field actions[action_type:post_engagement] --facebook-field actions[action_type:video_view] --facebook-field age --facebook-field gender --facebook-time-increment 1 --facebook-start-date 2020-01-01 --facebook-end-date 2020-01-03 write_console -``` - -*Example of Ad Management Request* -``` -python nck/entrypoint.py read_facebook --facebook-access-token --facebook-object-id --facebook-ad-insights False --facebook-level ad --facebook-field id --facebook-field creative[id] --facebook-add-date-to-report True --facebook-start-date 2020-01-01 --facebook-end-date 2019-01-01 write_console -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -|CLI option|Documentation| -|:--|:--| -|`--facebook-app-id`|Facebook App ID. *Not mandatory if Facebook Access Token is provided.*| -|`--facebook-app-secret`|Facebook App Secret. *Not mandatory if Facebook Access Token is provided.*| -|`--facebook-access-token`|Facebook App Access Token.| -|`--facebook-object-type`|Nature of the root Facebook Object used to make the request. *Possible values: creative (available only for Ad Management requests), ad, adset, campaign, account (default).*| -|`--facebook-object-id`|ID of the root Facebook Object used to make the request.| -|`--facebook-level`|Granularity of the response. *Possible values: creative (available only for Ad Management requests), ad (default), adset, campaign, account.*| -|`--facebook-ad-insights`|*True* (default) if *Ad Insights* request, *False* if *Ad Management* request.| -|`--facebook-field`|Fields to be retrieved.| -|`--facebook-start-date`|Start date of the period to request (format: YYYY-MM-DD). *This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels.*| -|`--facebook-end-date`|Start date of the period to request (format: YYYY-MM-DD). *This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels.*| -|`--facebook-date-preset`|Relative time range. Ignored if *--facebook-start date* and *--facebook-end-date* are specified. *This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels.*| -|`--facebook-time-increment`|Cuts the results between smaller time slices within the specified time range. *This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels.*| -|`--facebook-add-date-to-report`|*True* if you wish to add the date of the request to each response record, *False* otherwise (default).| -|`--facebook-breakdown`|How to break down the result. *This parameter is only relevant for Ad Insights Requests.*| -|`--facebook-action-breakdown`|How to break down action results. *This parameter is only relevant for Ad Insights Requests.*| - -#### Additional information - -**1. Make sure to select the appropriate `--facebook-level`** - -|If Facebook Object Type is...|Facebook Level can be...| -|:--|:--| -|`account`|account, campaign, adset, ad, creative| -|`campaign`|campaign, adset, ad| -|`adset`|adset, ad, creative| -|`ad`|ad, creative| -|`creative`|creative| - -**2. Format Facebook Marketing Reader response using `--facebook-fields`** - -2.1. The list of **applicable fields** can be found on the links below: - -- **Ad Insights Request**: [all fields](https://developers.facebook.com/docs/marketing-api/insights/parameters/v7.0) -- **Ad Management Request**: [Account-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-account), [Campaign-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-campaign-group), [Adset-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-campaign), [Ad-level fields](https://developers.facebook.com/docs/marketing-api/reference/adgroup), [Creative-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-creative) - -2.2. If you want to select **a nested field value**, simply indicate the path to this value within the request field. - -*Facebook Marketing Reader Request* -``` ---facebook-field object_story_spec[video_data][call_to_action][value][link] -``` - -*API Response* -``` -"object_story_spec": { - "video_data": { - "call_to_action": { - "type": "LEARN_MORE", - "value": { - "link": "https://www.artefact.com", - "link_format": "VIDEO_LPP" - } - } - } -} -``` - -*Facebook Marketing Reader Response* -``` -{"object_story_spec_video_data_call_to_action_value_link": "https://www.artefact.com"} -``` - -2.3 **Action Breakdown filters** can be applied to the fields of ***Ad Insights* Requests** using the following syntax: [:]. You can combine multiple Action Breakdown filters on the same field by adding them in cascade next to each other. - -*Facebook Marketing Reader Request* -``` ---facebook-action-breakdown action_type ---facebook-field actions[action_type:video_view][action_type:post_engagement] -``` - -*API Response* -``` -"actions": [ - { - "action_type": "video_view", - "value": "17" - }, - { - "action_type": "link_click", - "value": "8" - }, - { - "action_type": "post_engagement", - "value": "25" - }, - { - "action_type": "page_engagement", - "value": "12" - } -] - -``` -*Facebook Marketing Reader Response* -``` -{"actions_action_type_video_view": "17", "actions_action_type_post_engagement": "25"} -``` - -## Google Readers - -### Authentication - -You can authenticate to most of the Readers of the Google Suite following the same schema. You'll need to generate a **refresh token** to connect via the OAuth flow. A full script to do this can be found in this [refresh token generator](https://github.com/artefactory/Refresh-token-generator-for-google-oauth). - -### Google Ads Reader - -#### Source API - -[AdWords API](https://developers.google.com/adwords/api/docs/guides/start) - -#### How to obtain credentials - -Using the AdWords API requires four things: -- A developer token (Generated at a company level - one per company -, takes around 2 days to be approved by Google) which can be completely independant from the Google Ads Account you will be calling (though you need a Manager Google Ads Account to request a token for your company) -- OAuth2 credentials: and -- A refresh token, created with the email address able to access to all the Google Ads Account you will be calling -- The ID of the Google Ads Accounts you will be reading from (XXX-XXX-XXXX numbers, written right next to your Account Name) - -See the [documentation here](https://developers.google.com/adwords/api/docs/guides/signup) to apply for access if your Company does not already have a developer token (granting you the right to use the API). - -See the [documentation here](https://developers.google.com/adwords/api/docs/guides/first-api-call) to set-up your OAuth2 credentials and refresh token specifically for your Google Ads Accounts. - -#### Quickstart - -The following command retrieves insights about the Ads of *my_first_campaign* and *my_second_campaign* in the Google Ads Account , thanks to your company , , and with the necessary permissions to access your Accounts. - -``` -python nck/entrypoint.py read_googleads --googleads-developer-token --googleads-client-id --googleads-client-secret --googleads-refresh-token --googleads-client-customer-id --googleads-report-type AD_PERFORMANCE_REPORT --googleads-date-range-type LAST_7_DAYS --googleads-field CampaignName --googleads-field AdGroupName --googleads-field Headline --googleads-field Date --googleads-field Impressions --googleads-report-filter "{'field':'CampaignName','operator':'IN','values':['my_first_campaign','my_second_campaign']}" -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -|CLI option|Documentation| -|--|--| -|`--googleads-developer-token`|Company Developer token for Google Ads API| -|`--googleads-client-id`|OAuth2 ID| -|`--googleads-client-secret`|OAuth2 secret| -|`--googleads-refresh-token`|Refresh token for OAuth2| -|`--googleads-manager-id`|(Optional) Manager_Account_ID (XXX-XXX-XXXX identifier)| -|`--googleads-client-customer-id`|GAds_Account_ID (ignored if a manager account ID was given)| -|`--googleads-report-name`|(Optional) Name of your output stream ("Custom Report" by default)| -|`--googleads-report-type`|Type of report to be called| -|`--googleads-date-range-type`|Type of date range to apply (if "CUSTOM_RANGE", a min and max date must be specified). *Possible values can be found [here](https://developers.google.com/adwords/api/docs/guides/reporting#date_ranges).*| -|`--googleads-start-date`|(Optional) Start date for "CUSTOM_RANGE" date range (format: YYYY-MM-DD)| -|`--googleads-end-date`|(Optional) End date for "CUSTOM_RANGE" date range (format: YYYY-MM-DD)| -|`--googleads-field`|Fields to include in the report| -|`--googleads-report-filter`|Filter to apply on a chosen field (Dictionary as String "{'field':,'operator':,'values':}")| -|`--googleads-include-zero-impressions`|Boolean specifying whether or not rows with zero impressions should be included in the report| -|`--googleads-filter-on-video-campaigns`|Boolean used to filter the report on Video Campaigns only (require CampaignId to be listed as a field)| -|`--googleads-include-client-customer-id`|Boolean used to add "AccountId" as a field in the output stream. *AccountId is not available in the API, but is known since it's a requirement to call the API (= Client Customer ID)*| - -See documentation below for a better understanding of the parameters: -- [Reporting basics](https://developers.google.com/adwords/api/docs/guides/reporting#create_a_report_definition) -- [Available reports and associated fields](https://developers.google.com/adwords/api/docs/appendix/reports#available-reports) - -### Google Analytics Reader - -#### Source API - -[Analytics Reporting API](https://developers.google.com/analytics/devguides/reporting/core/v4) - -#### Quickstart - -The following command retrieves sessions, pageviews and bounces volumes by date from 2020-01-01 to 2020-01-03, for the Analytics View , thanks your , and with the necessary permissions to access your accounts. - -``` -python nck/entrypoint.py read_ga --ga-client-id --ga-client-secret --ga-view-id --ga-refresh-token --ga-dimension ga:date --ga-metric sessions --ga-metric ga:pageviews --ga-metric ga:bounces --ga-start-date 2020-01-01 --ga-end-date 2020-01-03 write_console -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -|CLI option|Documentation| -|--|--| -|`--ga-client-id`|OAuth2 ID| -|`--ga-client-secret`|OAuth2 secret| -|`--ga-access-token`|(Optional) Access token for OAuth2| -|`--ga-refresh-token`|Refresh token for OAuth2| -|`--ga-view-id`|Analytics View ID from which to retrieve data. See documentation [here](https://support.google.com/analytics/answer/1009618) for a better understanding of Google Analytics hierrarchy.| -|`--ga-account-id`|Analytics Account ID from which to retrieve data. See documentation [here](https://support.google.com/analytics/answer/1009618) for a better understanding of Google Analytics hierrarchy.| -|`--ga-dimension`|Dimensions to include in the report (max 9). Possible values can be found [here](https://ga-dev-tools.appspot.com/dimensions-metrics-explorer/).| -|`--ga-metric`|Metrics to include in the report (min 1, max 10). Possible values can be found [here](https://ga-dev-tools.appspot.com/dimensions-metrics-explorer/).| -|`--ga-segment-id`|Segment ID of a built-in or custom segment (for example gaid::-3) on which report data should be segmented.| -|`--ga-start-date`|Start date of the period to request (format: YYYY-MM-DD)| -|`--ga-end-date`|End date of the period to request (format: YYYY-MM-DD)| -|`--ga-date-range`| of the period to request, specified as a unique argument (format: YYYY-MM-DD YYYY-MM-DD)| -|`--ga-day-range`|Relative time range. *Possible values: PREVIOUS_DAY, LAST_30_DAYS, LAST_7_DAYS, LAST_90_DAYS.*| -|`--ga-sampling-level`|Desired sample size. See documentation [here](https://support.google.com/analytics/answer/2637192) for a better understanding of Google Analytics sampling. *Possible values: SMALL, DEFAULT, LARGE (default).*| -|`--ga-add-view`|If set to *True* (default: False)*, adds a "ga:viewId" field to the output stream.| - -See documentation [here](https://developers.google.com/analytics/devguides/reporting/core/v4/basics) for a better understanding of the parameters. - -### Google Cloud Storage Reader - -*Not documented yet.* - -### Google Campaign Manager Reader - -#### Source API - -[DCM/DFA Reporting and Trafficking API](https://developers.google.com/doubleclick-advertisers/v3.3) - -#### Quickstart - -The following command retrieves impressions, clicks and cost volumes from 2020-01-01 to 2020-01-03, thanks your , , and with the necessary permissions to access your accounts. - -``` -python nck/entrypoint.py read_dcm --dcm-client-id --dcm-client-secret --dcm-refresh-token --dcm-profile-id --dcm-dimension dfa:date --dcm-metric dfa:impressions --dcm-metric dfa:clicks --dcm-metric dfa:mediaCost --dcm-start-date 2020-01-01 --dcm-end-date 2020-01-03 write_console -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -##### Parameters - -|CLI option|Documentation| -|--|--| -|`--dcm-client-id`|OAuth2 ID| -|`--dcm-client-secret`|OAuth2 secret| -|`--dcm-access-token`|(Optional) Access token for OAuth2| -|`--dcm-refresh-token`|Refresh token for OAuth2| -|`--dcm-profile-id`|ID of the DFA user profile that has been granted permissions to the CM account for which you want to retrieve data. You should have 1 DFA user profile per CM account that you can access. The associated ID can be found directly on your Campaign Manager UI (when accessing your list of CM accounts, on the top right hand corner).| -|`--dcm-report-name`|Name of the report, that will appear in CM UI.| -|`--dcm-report-type`|Type of the report. *Possible values: CROSS_DIMENSION_REACH, FLOODLIGHT, PATH_TO_CONVERSION, REACH, STANDARD.*| -|`--dcm-dimension`|Dimensions to include in the report. *Possible values can be found [here](https://developers.google.com/doubleclick-advertisers/v3.3/dimensions).*| -|`--dcm-metric`|Metrics to include in the report. *Possible values can be found [here](https://developers.google.com/doubleclick-advertisers/v3.3/dimensions).*| -|`--dcm-filter`| association, used to narrow the scope of the report. For instance "dfa:advertiserId XXXXX" will narrow report scope to the performance of Advertiser ID XXXXX. *Possible filter types can be found [here](https://developers.google.com/doubleclick-advertisers/v3.3/dimensions).*| -|`--dcm-start-date`|Start date of the period to request (format: YYYY-MM-DD)| -|`--dcm-end-date`|End date of the period to request (format: YYYY-MM-DD)| - -### Google DoubleClick Manager Reader (DBM) - -#### Source API - -[Doubleclick Bid Manager API](https://developers.google.com/bid-manager/v1) - -#### Quickstart - -The following command retrieves impressions, clicks and cost volumes filtered on a specific from 2020-01-01 to 2020-01-03, thanks your , and with the necessary permissions to access your accounts. - -``` -python nck/entrypoint.py read_dbm --dbm-client-id --dbm-client-secret —dbm-refresh-token —dbm-filter FILTER_ADVERTISER --dbm-query-dimension FILTER_DATE --dbm-query-metric METRIC_IMPRESSIONS --dbm-query-metric METRIC_CLICKS --dbm-query-metric METRIC_MEDIA_COST_ADVERTISER --dbm-query-param-type TYPE_GENERAL --dbm-request-type custom_query_report --dbm-start-date 2020-01-01 --dbm-end-date 2020-01-03 write_console -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -|CLI option|Documentation| -|--|--| -|`--dbm-client-id`|OAuth2 ID| -|`--dbm-client-secret`|OAuth2 secret| -|`--dbm-access-token`|(Optional) Access token for OAuth2| -|`--dbm-refresh-token`|Refresh token for OAuth2| -|`--dbm-query-request-type`|Doubleclick Bid Manager API request type. *Possible values: existing_query, custom_query, existing_query_report, custom_query_report, lineitems_objects, sdf_objects and list_reports.*| -|`--dbm-query-id`|Query ID.| -|`--dbm-query-title`|Query title, used to name the reports generated from this query in DV360 UI.| -|`--dbm-query-frequency`|How often the query is run. *Possible values can be found [here](https://developers.google.com/bid-manager/v1/queries#schedule.frequency). Default: ONE_TIME.*| -|`--dbm-filter`| association, used to narrow the scope of the report. For instance "FILTER_ADVERTISER XXXXX" will narrow report scope to the performance of Advertiser ID XXXXX. *Possible filter types can be found [here](https://developers.google.com/bid-manager/v1/filters-metrics#filters).*| -|`--dbm-query-dimension`|Dimensions to include in the report. *Possible values can be found [here](https://developers.google.com/bid-manager/v1/filters-metrics#filters).*| -|`--dbm-query-metric`|Metrics to include in the report. *Possible values can be found [here](https://developers.google.com/bid-manager/v1/filters-metrics#metrics).*| -|`--dbm-query-param-type`|Report type. *Possible values can be found [here](https://developers.google.com/bid-manager/v1/queries#params.type). Default: TYPE_TRUEVIEW.*| -|`--dbm-start-date`|Start date of the period to request (format: YYYY-MM-DD)| -|`--dbm-end-date`|End date of the period to request (format: YYYY-MM-DD)| - -### Google Search Console Reader - -#### Source API - -[Search Console API (Search Analytics endpoint)](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/) - -#### How to obtain credentials - -Using the Google Search Console API requires three main parameters: -- OAuth2 credentials: and -- A refresh token, created with the email address able to access to your Google Search Console Account. -- The URLs whose performance you want to see - -#### Quickstart - -The following command retrieves insights about the URL from 2020-01-01 to 2020-01-03, thanks to your and with the necessary permissions to access your accounts. - -``` -python nck/entrypoint.py read_search_console --search-console-client-id --search-console-refresh-token --search-console-site-url --search-console-dimensions country --search-console-dimensions device --search-console-start-date 2020-01-01 --search-console-end-date 2020-01-03 write_console -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -|CLI option|Documentation| -|--|--| -|`--search-console-client-id`|OAuth2 ID| -|`--search-console-client-secret`|OAuth2 secret| -|`--search-console-access-token`|Access token for OAuth2| -|`--search-console-refresh-token`|Refresh token for OAuth2| -|`--search-console-dimensions`|Dimensions of the report. *Possible values can be found [here](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query#dimensionFilterGroups.filters.dimension).*| -|`--search-console-site-url`|Site URL whose performance you want to request| -|`--search-console-start-date`|Start date of the period to request (format: YYYY-MM-DD)| -|`--search-console-end-date`|End date of the period to request (format: YYYY-MM-DD)| -|`--search-console-date-column`|If set to *True*, a date column will be included in the report| -|`--search-console-row-limit`|Row number by report page| - -See documentation [here](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query) for a better understanding of the parameters. - -### Google Search Ads 360 Reader - -#### Source API - -[Search Ads 360 API](https://developers.google.com/search-ads/v2/reference) - -#### How to obtain credentials - -Using the Search Ads API requires two things: -- OAuth2 credentials: and -- A refresh token, created with the email address able to access to all the Search Ads 360 Account you will be calling - -See the [documentation here](https://developers.google.com/search-ads/v2/authorizing "SA360 Authentication") -to set-up your OAuth2 credentials and refresh token specifically for Search Ads 360 Reporting. - -#### Quickstart - -The following command retrieves insights about the Ads in the Search Ads 360 Account from the agency thanks to your , and with the necessary permissions to access your accounts. - -``` -python nck/entrypoint.py read_sa360 --sa360-client-id --sa360-client-secret --sa360-refresh-token --sa360-agency-id --sa360-advertiser-id --sa360-report-type keyword --sa360-column date --sa360-column impr --sa360-column clicks --sa360-start-date 2020-01-01 --sa360-end-date 2020-01-01 -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -|CLI option|Documentation| -|--|--| -|`--sa360-client-id`|OAuth2 ID| -|`--sa360-client-secret`|OAuth2 secret| -|`--sa360-access-token`|(Optional) Access token| -|`--sa360-refresh-token`|Refresh token| -|`--sa360-agency-id`|Agency ID to request in SA360| -|`--sa360-advertiser-id`|(Optional) Advertiser ids to request. If not provided, every advertiser of the agency will be requested| -|`--sa360-report-name`|(Optional) Name of the output report| -|`--sa360-report-type`| Type of the report to request. *Possible values can be found [here](https://developers.google.com/search-ads/v2/report-types).*| -|`--sa360-column`|Dimensions and metrics to include in the report| -|`--sa360-saved-column`|(Optional) Saved columns to report. *Documentation can be found [here](https://developers.google.com/search-ads/v2/how-tos/reporting/saved-columns).*| -|`--sa360-start-date`|Start date of the period to request (format: YYYY-MM-DD)| -|`--sa360-end-date`|End date of the period to request (format: YYYY-MM-DD)| - -See documentation [here](https://developers.google.com/search-ads/v2/how-tos/reporting) for a better understanding of the parameters. - -### Google Sheets Reader - - -#### Source API - -[Google Sheets API](https://developers.google.com/sheets/api) - -#### Quickstart - -This command allows you to retrieve the desired information from the google sheet row by row in a dict format. For example, given 3 columns a, b, c and 2 rows with respectively the values d,e,f and g, h, i, we would obtain such a dict : - -``` -{"a": "d", "b": "e", "c": "f"} -{"a": "g", "b": "h", "c": "i"} -``` - -#### Parameters - -|CLI option|Documentation| -|--| -| -|`--gs-project-id`|Project ID that is given by Google services once you have created your project in the google cloud console. You can retrieve it in the JSON credential file| -|`--gs-private-key-id`|Private key ID given by Google services once you have added credentials to the project. You can retrieve it in the JSON credential file| -|`--gs-private-key-path`|The path to the private key that is stored in a txt file. You can retrieve it first in the JSON credential file| -|`--gs-client-email`|Client e-mail given by Google services once you have added credentials to the project. You can retrieve it in the JSON credential file| -|`--gs-client-id`|Client ID given by Google services once you have added credentials to the project. You can retrieve it in the JSON credential file| -|`--gs-client-cert`|Client certificate given by Google services once you have added credentials to the project. You can retrieve it in the JSON credential file| -|`--gs-file-name`|The name you have given to your google sheet file| -|`--gs-page-number`|The page number you want to access.The number pages starts at 0| - -#### How to obtain credentials - -To use the nck google_sheets you must first retrieve your credentials. In order to do so head to console.cloud.google.com. In the header, chose your project or create a new one. Next step is to enable some APIs, namely google drive and google sheets api in the API Library. You’ll find it in the « APIs & Services » tab. Now that your google drive API is enabled, click on the « create credentials » button on the upper right corner and enter these informations : - -![alt text](https://github.com/artefactory/nautilus-connectors-kit/blob/upgrade-gs/documentation_images/credentials_gs.png) - -Click on "what credentials do I need" and complete the form. -You will find the credentials you need in the JSON file that will start downloading automatically right after. - - -## Oracle Reader - -*Not documented yet.* - -## MySQL Reader - -*Not documented yet.* - -## Radarly Reader - -*Not documented yet.* - -## Salesforce Reader - -*Not documented yet.* - -## The Trade Desk Reader - -#### How to obtain credentials - -- Ask your Account Representative to **give you access to The Trade Desk API and UI** -- He will generally provide you with **two distinct accounts**: an **API account**, allowing you to make API calls (*Login: ttd_api_{XXXXX}@client.com*), and a **UI account**, allowing you to navigate on The Trade Desk UI to create Report Templates (*Login: your professional e-mail address*) -- Pass **the Login and Password of your API account** to The Trade Desk connector - -#### Quickstart - -To request dimensions and metrics to The Trade Desk API, you should first **create a Report Template in The Trade Desk UI**, by following the below process: - -- Connect to [The Trade Desk UI](https://desk.thetradedesk.com/) using the Login and Password of your UI account -- Navigate to *Reports* > *My Reports* to land on the *Report Templates* section -- Clone an existing Report Template, edit it to keep only the dimensions and metrics that you want to collect, and save it: it will appear under the *Mine* section -- Provide the exact name of the Report Template you have just created under the CLI option `--ttd-report-template-name` of The Trade Desk connector: the connector will "schedule" a report instance (which may take a few minutes to run), and fetch data to the location of your choice - -The following command retrieves the data associated to the Report template named "*adgroup_performance_report*" between 2020-01-01 and 2020-01-03, filtered on the PartnerId : -``` -python nck/entrypoint.py read_ttd --ttd-login --ttd-password --ttd-partner-id --ttd-report-template-name adgroup_performance_report --ttd-start-date 2020-01-01 --ttd-end-date 2020-01-03 write_console -``` -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -|CLI option|Documentation| -|--|--| -|`--ttd-login`|Login of your API account| -|`--ttd-password`|Password of your API account| -|`--ttd-advertiser-id`|Advertiser Ids for which report data should be fetched| -|`--ttd-report-template-name`|Exact name of the Report Template to request. Existing Report Templates can be found within the [MyReports section](https://desk.thetradedesk.com/MyReports) of The Trade Desk UI.| -|`--ttd-report-schedule-name`|Name of the Report Schedule to create| -|`--ttd-start-date`|Start date of the period to request (format: YYYY-MM-DD)| -|`--ttd-end-date`|End date of the period to request (format: YYYY-MM-DD)| -|`--ttd-normalize-stream`|If set to True, yields a NormalizedJSONStream (spaces and special characters replaced by '_' in field names, which is useful for BigQuery). Else (*default*), yields a standard JSONStream.| - -If you need any further information, the documentation of The Trade Desk API can be found [here](https://api.thetradedesk.com/v3/portal/api/doc/ApiOverview). - -## Twitter Ads Reader - -#### Source API - -[Twitter Ads API](https://developer.twitter.com/en/docs/ads/general/overview) - -#### How to obtain credentials - -* **Apply for a developer account** through [this link](https://developer.twitter.com/en/apply). -* **Create a Twitter app** on the developer portal: it will generate your authentication credentials. -* **Apply for Twitter Ads API access** by filling out [this form](https://developer.twitter.com/en/docs/ads/general/overview/adsapi-application). Receiving Twitter approval may take up to 7 business days. -* **Get access to the Twitter Ads account** you wish to retrieve data for, on the @handle that you used to create your Twitter App. Be careful, access levels matter: with an *Ad Manager* access, you will be able to request all report types; with a *Campaign Analyst* access, you will be able to request all report types, except ENTITY reports on Card entities. - -#### Quickstart - -The Twitter Ads Reader can collect **3 types of reports**, making calls to 4 endpoints of the Twitter Ads API: -* **ANALYTICS reports**, making calls to the [Asynchronous Analytics endpoint](https://developer.twitter.com/en/docs/ads/analytics/api-reference/asynchronous). These reports return performance data for a wide range of metrics, that **can be aggregated over time**. Output data **can be splitted by day** when requested over a larger time period. -* **REACH reports**, making calls to the [Reach and Average Frequency endpoint](https://developer.twitter.com/en/docs/ads/analytics/api-reference/reach). These reports return performance data with a focus on reach and frequency metrics, that **cannot be aggregated over time** (*e.g. the reach of day A and B is not equal to the reach of day A + the reach of day B, as it counts unique individuals*). Output data **cannot be splitted by day** when requested over a larger time period. These reports are available **only for the Funding Instrument and Campaign entities**. -* **ENTITY reports**, making calls to [Campaign Management endpoints](https://developer.twitter.com/en/docs/ads/campaign-management/api-reference) if the selected entity is Funding Instrument, Campaign, Line Item, Media Creative or Promoted Tweet, and to the [Creative endpoint](https://developer.twitter.com/en/docs/ads/creatives/api-reference/) if the selected entity is Card. These reports return details on entity configuration since the creation of the Twitter Ads account. - -*Call example for ANALYTICS reports*: this call will collect engagement metrics for Line Item entities, splitting the results by day, from 2020-01-01 to 2020-01-03: -``` -python nck/entrypoint.py read_twitter --twitter-consumer-key --twitter-consumer-secret --twitter-access-token --twitter-access-token-secret --twitter-account-id --twitter-report-type ANALYTICS --twitter-entity LINE_ITEM --twitter-metric-group ENGAGEMENT --twitter-segmentation-type AGE --twitter-granularity DAY --twitter-start-date 2020-01-01 --twitter-end-date 2020-01-03 write_console -``` - -*Call example for REACH reports*: this call will collect reach metrics (*total_audience_reach, average_frequency*) for Campaign entities, from 2020-01-01 to 2020-01-03: -``` -python nck/entrypoint.py read_twitter --twitter-consumer-key --twitter-consumer-secret --twitter-access-token --twitter-access-token-secret --twitter-account-id --twitter-report-type REACH --twitter-entity CAMPAIGN --twitter-start-date 2020-01-01 --twitter-end-date 2020-01-03 write_console -``` - -*Call example for ENTITY reports*: this call collects details on the configuration of Campaign entities (id, name, total_budget_amount_local_micro, currency), since the creation of the Twitter Ads account: -``` -python nck/entrypoint.py read_twitter --twitter-consumer-key --twitter-consumer-secret --twitter-access-token --twitter-access-token-secret --twitter-account-id --twitter-report-type REACH --twitter-entity CAMPAIGN --twitter-entity-attribute id --twitter-entity-attribute name --twitter-entity-attribute total_budget_amount_local_micro --twitter-entity-attribute currency write_console -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -|CLI option|Documentation| -|--|--| -|`--twitter-consumer-key`|API key, available in the 'Keys and tokens' section of your Twitter Developer App.| -|`--twitter-consumer-secret`|API secret key, available in the 'Keys and tokens' section of your Twitter Developer App.| -|`--twitter-access-token`|Access token, available in the 'Keys and tokens' section of your Twitter Developer App.| -|`--twitter-access-token-secret`|Access token secret, available in the 'Keys and tokens' section of your Twitter Developer App.| -|`--twitter-account-id`|Specifies the Twitter Account ID for which the data should be returned.| -|`--twitter-report-type`|Specifies the type of report to collect. *Possible values: ANALYTICS, REACH, ENTITY.*| -|`--twitter-entity`|Specifies the entity type to retrieve data for. *Possible values: FUNDING_INSTRUMENT, CAMPAIGN, LINE_ITEM, MEDIA_CREATIVE, PROMOTED_TWEET, CARD.*| -|`--twitter-entity-attribute`|Specific to ENTITY reports. Specifies the entity attribute (configuration detail) that should be returned. *To get possible values, print the ENTITY_ATTRIBUTES variable on nck/helpers/twitter_helper.py*| -|`--twitter-granularity`|Specific to ANALYTICS reports. Specifies how granular the retrieved data should be. *Possible values: TOTAL (default), DAY.*| -|`--twitter-metric-group`|Specific to ANALYTICS reports. Specifies the list of metrics (as a group) that should be returned. *Possible values can be found [here](https://developer.twitter.com/en/docs/ads/analytics/overview/metrics-and-segmentation).* | -|`--twitter-placement`|Specific to ANALYTICS reports. Scopes the retrieved data to a particular placement. *Possible values: ALL_ON_TWITTER (default), PUBLISHER_NETWORK.*| -|`--twitter-segmentation-type`|Specific to ANALYTICS reports. Specifies how the retrieved data should be segmented. *Possible values can be found [here](https://developer.twitter.com/en/docs/ads/analytics/overview/metrics-and-segmentation).* | -|`--twitter-platform`|Specific to ANALYTICS reports. Required if segmentation_type is set to DEVICES or PLATFORM_VERSION. *Possible values can be identified through the targeting_criteria/locations*| -|`--twitter-country`|Specific to ANALYTICS reports. Required if segmentation_type is set to CITIES, POSTAL_CODES, or REGION. *Possible values can be identified through the GET targeting_criteria/platforms endpoint.*| -|`--twitter-start-date`|Start date of the period to request (format: YYYY-MM-DD).| -|`--twitter-end-date`|End date of the period to request (format: YYYY-MM-DD).| -|`--twitter-add-request-date-to-report`|If set to *True* (default: *False*), the date on which the request is made will appear on each report record.| - -If you need any further information, the documentation of Twitter Ads API can be found [here](https://developer.twitter.com/en/docs/ads/general/overview). To get a better understanding of **Twitter Ads Hierrarchy and Terminology**, we advise you to have a look at [this page](https://developer.twitter.com/en/docs/tutorials/ads-api-hierarchy-terminology). - -## Yandex Readers - -#### Source API - -[Yandex Direct API](https://tech.yandex.com/direct/) - -#### How to obtain credentials - -In order to access Yandex Direct API, you need two accounts: an advertiser account and a developer account. -Here is the process: - -1. Create a developer account if you don't already have one. Click on the *Get started* button on this [page](https://direct.yandex.com/). -2. Create and register an app that will access Yandex Direct API via [Yandex OAuth](https://oauth.yandex.com/client/new). -3. Keep app client id safe. Log in with your advertiser account and [give permission to the app to access your data](https://tech.yandex.com/oauth/doc/dg/tasks/get-oauth-token-docpage/). -4. Store your token very carefully. -5. Log out and log in as a developer and [ask permission to access Yandex Direct API](https://direct.yandex.com/registered/main.pl?cmd=apiSettings) (ask for Full access). Fill in the form. -6. Wait for Yandex support to reply but it should be within a week. - -### Yandex Campaign Reader - -[Official documentation](https://tech.yandex.com/direct/doc/ref-v5/campaigns/get-docpage/) - -#### Quickstart - -The following command retrieves the daily budget of all your campaigns, since your account creation. - -``` -python nck/entrypoint.py read_yandex_campaigns --yandex-token --yandex-field-name Id --yandex-field-name Name --yandex-field-name DailyBudget write_console -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -|CLI option|Documentation| -|--| -| -|`--yandex-token`|Bear token that allows you to authenticate to the API| -|`--yandex-campaign-id`|(Optional) Selects campaigns with the specified IDs.| -|`--yandex-campaign-state`|(Optional) Selects campaigns with the specified states. *Possible values can be found [here](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status).*| -|`--yandex-campaign-status`|(Optional) Selects campaigns with the specified statuses. *Possible values can be found [here](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status).*| -|`--yandex-campaign-payment-status`|(Optional) Selects campaigns with the specified payment [statuses](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status).| -|`--yandex-field-name`|Parameters to get that are common to all types of campaigns.| - -### Yandex Statistics Reader - -[Official documentation](https://tech.yandex.com/direct/doc/reports/reports-docpage/) - -#### Quickstart - -The following command retrieves a performance report for all your campaigns, since your account creation. - -``` -python nck/entrypoint.py read_yandex_statistics --yandex-token --yandex-report-type AD_PERFORMANCE_REPORT --yandex-field-name AdFormat --yandex-field-name AdId --yandex-field-name Impressions --yandex-include-vat True --yandex-report-language en --yandex-field-name AdGroupName --yandex-field-name AdGroupId --yandex-field-name AdNetworkType --yandex-field-name CampaignId --yandex-field-name CampaignName --yandex-field-name CampaignType --yandex-field-name Date --yandex-field-name Device --yandex-field-name Clicks --yandex-field-name Conversions --yandex-field-name Cost --yandex-date-range ALL_TIME write_console -``` - -Didn't work? See [troubleshooting](#troubleshooting) section. - -#### Parameters - -Detailed version [here](https://tech.yandex.com/direct/doc/reports/spec-docpage/). - -|CLI option|Documentation| -|--|--| -|`--yandex-token`|Bear token that allows you to authenticate to the API| -|`--yandex-report-language`|(Optional) Language of the report. *Possible values can be found [here](https://tech.yandex.com/direct/doc/dg/concepts/headers-docpage/#headers__accept-language).*| -|`--yandex-filter`|(Optional) Filters on a particular field.| -|`--yandex-max-rows`|(Optional) The maximum number of rows in the report.| -|`--yandex-field-name`|Information you want to collect. *Possible values can be found [here](https://tech.yandex.com/direct/doc/reports/fields-list-docpage/).*| -|`--yandex-report-type`|Type of report. Linked to the fields you want to select.| -|`--yandex-date-range`|*Possible values can be found [here](https://tech.yandex.com/direct/doc/reports/period-docpage/).*| -|`--yandex-include-vat`|Adds VAT to your expenses if set to `True`| -|`--yandex-date-start`|(Optional) Selects data on a specific period of time. Combined with `--yandex-date-stop` and `--yandex-date-range` set to `CUSTOM_DATE`.| -|`--yandex-date-stop`|(Optional) Selects data on a specific period of time. Combined with `--yandex-date-start` and `--yandex-date-range` set to `CUSTOM_DATE`.| - -## Troubleshooting - -You encountered an issue when running a Reader command and you don't know what's going on? -You may find an answer in the troubleshooting guide below. - -1. **Have you installed NCK dependencies?** In order to run NCK, you need to install all dependencies. First create a [virtual environment](https://docs.python.org/3/library/venv.html) and then run `pip install -r requirements.txt`. -2. **Have you set `PYTHONPATH` environment variable to the root of NCK folder?** -3. **Have you checked logs?** The code has been implemented so that every error is logged. For example, if you did not provide a valid token, you will see something like ```Invalid request. -{'error': {'error_code': '53', 'request_id': '8998435864716615689', 'error_string': 'Authorization error', 'error_detail': 'Invalid OAuth token'}}```. If you misspelled a field, you will get a message like this one: ```Error: Invalid value for "--yandex-field-name"```. diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py deleted file mode 100644 index b177167b..00000000 --- a/nck/readers/__init__.py +++ /dev/null @@ -1,69 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from nck.readers.dv360_reader import dv360 -from nck.readers.reader import Reader - -from nck.readers.mysql_reader import mysql -from nck.readers.gcs_reader import gcs -from nck.readers.googleads_reader import google_ads -from nck.readers.s3_reader import s3 -from nck.readers.sa360_reader import sa360_reader -from nck.readers.oracle_reader import oracle -from nck.readers.gsheets_reader import gsheets -from nck.readers.salesforce_reader import salesforce -from nck.readers.facebook_reader import facebook -from nck.readers.ttd_reader import the_trade_desk -from nck.readers.twitter_reader import twitter -from nck.readers.dbm_reader import dbm -from nck.readers.dcm_reader import dcm -from nck.readers.ga_reader import ga -from nck.readers.search_console_reader import search_console -from nck.readers.adobe_reader import adobe -from nck.readers.adobe_reader_2_0 import adobe_2_0 -from nck.readers.radarly_reader import radarly -from nck.readers.yandex_campaign_reader import yandex_campaigns -from nck.readers.yandex_statistics_reader import yandex_statistics -from nck.readers.gs_reader import google_sheets - -readers = [ - mysql, - salesforce, - gsheets, - gcs, - google_ads, - s3, - sa360_reader, - facebook, - the_trade_desk, - twitter, - oracle, - dv360, - dbm, - dcm, - ga, - search_console, - adobe, - adobe_2_0, - radarly, - yandex_campaigns, - yandex_statistics, - google_sheets -] - - -__all__ = ["readers", "Reader"] diff --git a/nck/readers/mysql_reader.py b/nck/readers/mysql_reader.py deleted file mode 100644 index a62a7226..00000000 --- a/nck/readers/mysql_reader.py +++ /dev/null @@ -1,45 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click - -from nck.commands.command import processor -from nck.readers.sql_reader import SQLReader, validate_sql_arguments -from nck.utils.args import extract_args - - -@click.command(name="read_mysql") -@click.option("--mysql-user", required=True) -@click.option("--mysql-password", required=True) -@click.option("--mysql-host", required=True) -@click.option("--mysql-port", required=False, default=3306) -@click.option("--mysql-database", required=True) -@click.option("--mysql-watermark-column") -@click.option("--mysql-watermark-init") -@click.option("--mysql-query") -@click.option("--mysql-query-name") -@click.option("--mysql-table") -@processor("mysql_password") -def mysql(**kwargs): - validate_sql_arguments(MySQLReader, "mysql", kwargs) - return MySQLReader(**extract_args("mysql_", kwargs)) - - -class MySQLReader(SQLReader): - @staticmethod - def connector_adaptor(): - return "mysql+pymysql" diff --git a/nck/readers/objectstorage_reader.py b/nck/readers/objectstorage_reader.py deleted file mode 100644 index a4a182c8..00000000 --- a/nck/readers/objectstorage_reader.py +++ /dev/null @@ -1,196 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import config -import tempfile -import logging - -from nck.readers.reader import Reader -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.utils.file_reader import FileEnum - - -def find_reader(_format, kwargs): - _format = _format.upper() - if _format in FileEnum.__members__: - r = getattr(FileEnum, _format).value - _reader = r(**kwargs).get_csv_reader() - else: - raise NotImplementedError( - f"The file format {str(_format)} has not been implemented for reading yet." - ) - return _reader - - -def no_files_seen_before(max_timestamp): - return not max_timestamp - - -def _object_older_than_most_recently_ingested_file(max_timestamp, _object_timestamp): - return max_timestamp > _object_timestamp - - -def _object_newer_than_most_recently_ingested_file(max_timestamp, _object_timestamp): - return max_timestamp < _object_timestamp - - -def _object_as_old_as_most_recently_ingested_file(max_timestamp, _object_timestamp): - return max_timestamp == _object_timestamp - - -class ObjectStorageReader(Reader): - def __init__( - self, bucket, prefix, file_format, dest_key_split, platform=None, **kwargs - ): - self._client = self.create_client(config) - self._bucket = self.create_bucket(self._client, bucket) - self._prefix_list = prefix - self._platform = platform - - self._format = file_format - self._reader = find_reader(self._format, kwargs) - self._dest_key_split = dest_key_split - - self.MAX_TIMESTAMP_STATE_KEY = f"{self._platform}_max_timestamp".lower() - self.MAX_FILES_STATE_KEY = f"{self._platform}_max_files".lower() - - def read(self): - - for prefix in self._prefix_list: - - objects_sorted_by_time = sorted( - self.list_objects(bucket=self._bucket, prefix=prefix), - key=lambda o: self.get_timestamp(o), - ) - - for _object in objects_sorted_by_time: - - _object = self.to_object(_object) - - logging.info(f"Found {self._platform} file {self.get_key(_object)}") - - if not self.is_compatible_object(_object): - logging.info( - f"Wrong extension: Skipping file {self.get_key(_object)}" - ) - continue - - if self.has_already_processed_object(_object): - logging.info( - f"Skipping already processed file {self.get_key(_object)}" - ) - continue - - def result_generator(): - temp = tempfile.TemporaryFile() - self.download_object_to_file(_object, temp) - - for record in self._reader(temp): - yield record - - self.checkpoint_object(_object) - - name = self.get_key(_object).split("/", self._dest_key_split)[-1] - - yield NormalizedJSONStream(name, result_generator()) - - def is_compatible_object(self, _object): - return self.get_key(_object).endswith("." + self._format) - - def has_already_processed_object(self, _object): - - assert self.get_timestamp(_object) is not None, "Object has no timestamp!" - - max_timestamp = self.state.get(self.MAX_TIMESTAMP_STATE_KEY) - - if no_files_seen_before(max_timestamp): - return False - - _object_timestamp = self.get_timestamp(_object) - - if _object_older_than_most_recently_ingested_file( - max_timestamp, _object_timestamp - ): - return True - - if _object_newer_than_most_recently_ingested_file( - max_timestamp, _object_timestamp - ): - return False - - if _object_as_old_as_most_recently_ingested_file( - max_timestamp, _object_timestamp - ): - max_files = self.state.get(self.MAX_FILES_STATE_KEY) - return self.get_key(_object) in max_files - - def checkpoint_object(self, _object): - - assert self.get_timestamp(_object) is not None, "Object has no timestamp!" - - max_timestamp = self.state.get(self.MAX_TIMESTAMP_STATE_KEY) - _object_timestamp = self.get_timestamp(_object) - - if max_timestamp and _object_older_than_most_recently_ingested_file( - max_timestamp, _object_timestamp - ): - raise RuntimeError("Object is older than max timestamp at checkpoint time") - - elif not max_timestamp or _object_newer_than_most_recently_ingested_file( - max_timestamp, _object_timestamp - ): - self.update_max_timestamp(_object_timestamp, _object) - - else: - assert _object_as_old_as_most_recently_ingested_file( - max_timestamp, _object_timestamp - ) - self.update_max_files(_object) - - def update_max_timestamp(self, _object_timestamp, _object): - self.state.set(self.MAX_TIMESTAMP_STATE_KEY, _object_timestamp) - self.state.set(self.MAX_FILES_STATE_KEY, [self.get_key(_object)]) - - def update_max_files(self, _object): - max_files = self.state.get(self.MAX_FILES_STATE_KEY) - max_files.append(self.get_key(_object)) - self.state.set(self.MAX_FILES_STATE_KEY, max_files) - - def create_client(self, config): - raise NotImplementedError - - def create_bucket(self, client, bucket): - raise NotImplementedError - - def list_objects(self, bucket, prefix): - raise NotImplementedError - - @staticmethod - def get_timestamp(_object): - raise NotImplementedError - - @staticmethod - def get_key(_object): - raise NotImplementedError - - @staticmethod - def to_object(_object): - raise NotImplementedError - - @staticmethod - def download_object_to_file(_object, temp): - raise NotImplementedError diff --git a/nck/readers/oracle_reader.py b/nck/readers/oracle_reader.py deleted file mode 100644 index 748baae5..00000000 --- a/nck/readers/oracle_reader.py +++ /dev/null @@ -1,46 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click - -from nck.commands.command import processor -from nck.readers.sql_reader import SQLReader, validate_sql_arguments -from nck.utils.args import extract_args - - -@click.command(name="read_oracle") -@click.option("--oracle-user", required=True) -@click.option("--oracle-password", required=True) -@click.option("--oracle-host", required=True) -@click.option("--oracle-port", required=False, default=2380) -@click.option("--oracle-database", required=True) -@click.option("--oracle-schema", required=True) -@click.option("--oracle-watermark-column") -@click.option("--oracle-watermark-init") -@click.option("--oracle-query") -@click.option("--oracle-query-name") -@click.option("--oracle-table") -@processor("oracle_password") -def oracle(**kwargs): - validate_sql_arguments(OracleReader, "oracle", kwargs) - return OracleReader(**extract_args("oracle_", kwargs)) - - -class OracleReader(SQLReader): - @staticmethod - def connector_adaptor(): - return "oracle+cx_oracle" diff --git a/nck/readers/salesforce_reader.py b/nck/readers/salesforce_reader.py deleted file mode 100644 index d803ffd9..00000000 --- a/nck/readers/salesforce_reader.py +++ /dev/null @@ -1,284 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import collections -import urllib - -import logging - -import click -import requests - -from nck.readers.reader import Reader -from nck.commands.command import processor -from nck.state_service import state -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.utils.args import extract_args, has_arg, hasnt_arg -from nck.utils.retry import retry - -SALESFORCE_LOGIN_ENDPOINT = "https://login.salesforce.com/services/oauth2/token" -SALESFORCE_LOGIN_REDIRECT = "https://login.salesforce.com/services/oauth2/success" -SALESFORCE_SERVICE_ENDPOINT = "https://eu16.force.com" -SALESFORCE_QUERY_ENDPOINT = "/services/data/v42.0/query/" -SALESFORCE_DESCRIBE_ENDPOINT = "/services/data/v42.0/sobjects/{obj}/describe" - - -@click.command(name="read_salesforce") -@click.option("--salesforce-consumer-key", required=True) -@click.option("--salesforce-consumer-secret", required=True) -@click.option("--salesforce-user", required=True) -@click.option("--salesforce-password", required=True) -@click.option("--salesforce-object-type") -@click.option("--salesforce-query") -@click.option("--salesforce-query-name") -@click.option("--salesforce-watermark-column") -@click.option("--salesforce-watermark-init") -@processor( - "salesforce_consumer_key", "salesforce_consumer_secret", "salesforce_password" -) -def salesforce(**kwargs): - query_key = "salesforce_query" - query_name_key = "salesforce_query_name" - object_type_key = "salesforce_object_type" - watermark_column_key = "salesforce_watermark_column" - watermark_init_key = "salesforce_watermark_init" - - if hasnt_arg(query_key, kwargs) and hasnt_arg(object_type_key, kwargs): - raise click.BadParameter( - "Must specify either an object type or a query for Salesforce" - ) - - if has_arg(query_key, kwargs) and has_arg(object_type_key, kwargs): - raise click.BadParameter( - "Cannot specify both a query and an object type for Salesforce" - ) - - if has_arg(query_key, kwargs) and hasnt_arg(query_name_key, kwargs): - raise click.BadParameter( - "Must specify a query name when running a Salesforce query" - ) - - if has_arg(watermark_column_key, kwargs) and not state().enabled: - raise click.BadParameter( - "You must activate state management to use Salesforce watermarks" - ) - - if hasnt_arg(watermark_column_key, kwargs) and state().enabled: - raise click.BadParameter( - "You must specify a Salesforce watermark when using state management" - ) - - if hasnt_arg(watermark_init_key, kwargs) and state().enabled: - raise click.BadParameter( - "You must specify an initial Salesforce watermark value when using state management" - ) - - return SalesforceReader(**extract_args("salesforce_", kwargs)) - - -class SalesforceClient(object): - def __init__(self, user, password, consumer_key, consumer_secret): - self._user = user - self._password = password - self._consumer_key = consumer_key - self._consumer_secret = consumer_secret - - self._headers = None - self._access_token = None - self._instance_url = None - - @property - def headers(self): - return { - "Content-type": "application/json", - "Accept-Encoding": "gzip", - "Authorization": "Bearer {}".format(self.access_token), - } - - @property - def access_token(self): - if not self._access_token: - self._load_access_info() - - return self._access_token - - @property - def instance_url(self): - if not self._instance_url: - self._load_access_info() - - return self._instance_url - - def _load_access_info(self): - logging.info("Retrieving Salesforce access token") - - res = requests.post(SALESFORCE_LOGIN_ENDPOINT, params=self._get_login_params()) - - res.raise_for_status() - - self._access_token = res.json().get("access_token") - self._instance_url = res.json().get("instance_url") - - return self._access_token, self._instance_url - - def _get_login_params(self): - return { - "grant_type": "password", - "client_id": self._consumer_key, - "client_secret": self._consumer_secret, - "username": self._user, - "password": self._password, - "redirect_uri": SALESFORCE_LOGIN_REDIRECT, - } - - def _request_data(self, path, params=None): - - endpoint = urllib.parse.urljoin(self.instance_url, path) - response = requests.get( - endpoint, headers=self.headers, params=params, timeout=30 - ) - - response.raise_for_status() - - return response.json() - - def describe(self, object_type): - path = SALESFORCE_DESCRIBE_ENDPOINT.format(obj=object_type) - return self._request_data(path) - - def query(self, query): - - logging.info("Running Salesforce query: %s", query) - - response = self._request_data(SALESFORCE_QUERY_ENDPOINT, {"q": query}) - - generating = True - - while generating: - - for rec in response["records"]: - yield rec - - if "nextRecordsUrl" in response: - logging.info("Fetching next page of Salesforce results") - response = self._request_data(response["nextRecordsUrl"]) - else: - generating = False - - -class SalesforceReader(Reader): - def __init__( - self, - consumer_key, - consumer_secret, - user, - password, - query, - query_name, - object_type, - watermark_column, - watermark_init, - ): - self._name = query_name or object_type - self._client = SalesforceClient(user, password, consumer_key, consumer_secret) - self._watermark_column = watermark_column - self._watermark_init = watermark_init - self._object_type = object_type - self._query = query - - def build_object_type_query(self, object_type, watermark_column): - description = self._client.describe(object_type) - fields = [f["name"] for f in description["fields"]] - - field_projection = ", ".join(fields) - query = "SELECT {fields} FROM {object_type}".format( - fields=field_projection, object_type=object_type - ) - - if watermark_column: - query = "{base} WHERE {watermark_column} > {{{watermark_column}}}".format( - base=query, watermark_column=watermark_column - ) - - return query - - @retry - def read(self): - def result_generator(): - - watermark_value = None - - if self._watermark_column: - watermark_value = self.state.get(self._name) or self._watermark_init - - if self._object_type: - self._query = self.build_object_type_query( - self._object_type, self._watermark_column - ) - - if self._watermark_column: - self._query = self._query.format( - **{self._watermark_column: watermark_value} - ) - - records = self._client.query(self._query) - - for rec in records: - row = self._clean_record(rec) - yield row - - if self._watermark_column: - self.state.set(self._name, row[self._watermark_column]) - - yield NormalizedJSONStream(self._name, result_generator()) - - @classmethod - def _clean_record(cls, record): - """ - Salesforces records contains metadata which we don't need during ingestion - """ - return cls._flatten(cls._delete_metadata_from_record(record)) - - @classmethod - def _delete_metadata_from_record(cls, record): - - if isinstance(record, dict): - strip_keys = ["attributes", "totalSize", "done"] - return { - k: cls._delete_metadata_from_record(v) - for k, v in record.items() - if k not in strip_keys - } - elif isinstance(record, list): - return [cls._delete_metadata_from_record(i) for i in record] - else: - return record - - @classmethod - def _flatten(cls, json_dict, parent_key="", sep="_"): - """ - Reduce number of dict levels - Note: useful to bigquery autodetect schema - """ - items = [] - for k, v in json_dict.items(): - new_key = parent_key + sep + k if parent_key else k - if isinstance(v, collections.MutableMapping): - items.extend(cls._flatten(v, new_key, sep=sep).items()) - else: - items.append((new_key, v)) - return dict(items) diff --git a/nck/readers/sql_reader.py b/nck/readers/sql_reader.py deleted file mode 100644 index e4b17ee5..00000000 --- a/nck/readers/sql_reader.py +++ /dev/null @@ -1,186 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging -import sqlalchemy -import click - -from nck.readers.reader import Reader -from nck.utils.sql import build_table_query, build_custom_query - -from nck.utils.retry import retry - -from nck.streams.normalized_json_stream import NormalizedJSONStream -from nck.state_service import state -from nck.utils.args import has_arg, hasnt_arg - - -def validate_sql_arguments(reader, prefix, kwargs): - query_key = "{}_query".format(prefix) - query_name_key = "{}_query_name".format(prefix) - table_key = "{}_table".format(prefix) - watermark_column_key = "{}_watermark_column".format(prefix) - watermark_init_key = "{}_watermark_init".format(prefix) - - if hasnt_arg(query_key, kwargs) and hasnt_arg(table_key, kwargs): - raise click.BadParameter( - "Must specify either a table or a query for {} reader".format( - reader.connector_name() - ) - ) - - if has_arg(query_key, kwargs) and has_arg(table_key, kwargs): - raise click.BadParameter("Cannot specify both a query and a table") - - if has_arg(query_key, kwargs) and hasnt_arg(query_name_key, kwargs): - raise click.BadParameter( - "Must specify a query name when running a {} query".format( - reader.connector_name() - ) - ) - - if has_arg(watermark_column_key, kwargs) and not state().enabled: - raise click.BadParameter( - "You must activate state management to use {} watermarks".format( - reader.connector_name() - ) - ) - - if hasnt_arg(watermark_column_key, kwargs) and state().enabled: - raise click.BadParameter( - "You must specify a {} watermark when using state management".format( - reader.connector_name() - ) - ) - - if hasnt_arg(watermark_init_key, kwargs) and state().enabled: - raise click.BadParameter( - "You must specify a {} watermark init value when using state management".format( - reader.connector_name() - ) - ) - - -class SQLReader(Reader): - _host = None - _port = None - _user = None - _password = None - _database = None - _schema = None - - _client = None - - _watermark_value = None - _watermark_column = None - - @classmethod - def connector_name(cls): - return cls.__name__ - - def __init__( - self, - user, - password, - host, - port, - database, - watermark_column=None, - watermark_init=None, - query=None, - query_name=None, - table=None, - schema=None, - ): - - self._engine = self._create_engine(host, port, user, password, database) - self._name = table if table else query_name - self._schema = schema - - self._watermark_column = watermark_column - - if watermark_column: - self._watermark_value = self.state.get(self._name) or watermark_init - - if table: - self._query = build_table_query( - self._engine, schema, table, watermark_column, self._watermark_value - ) - else: - self._query = build_custom_query( - self._engine, schema, query, watermark_column, self._watermark_value - ) - - @staticmethod - def connector_adaptor(): - raise NotImplementedError - - @classmethod - def _create_engine(cls, host, port, user, password, database): - logging.info( - "Connecting to %s Database %s on %s:%s", - cls.connector_name(), - database, - host, - port, - ) - - url = sqlalchemy.engine.url.URL( - **{ - "drivername": cls.connector_adaptor(), - "username": user, - "password": password, - "database": database, - "port": port, - "host": host, - } - ) - - return sqlalchemy.create_engine(url) - - def read(self): - try: - yield self._run_query() - finally: - self.close() - - @retry - def _run_query(self): - logging.info("Running %s query %s", self.connector_name(), self._query) - - rows = self._engine.execute(self._query) - - logging.info( - "%s result set contains %d rows", self.connector_name(), rows.rowcount - ) - - def result_generator(): - row = rows.fetchone() - while row: - yield dict(row.items()) - - if self._watermark_column: - self.state.set(self._name, row[self._watermark_column]) - - row = rows.fetchone() - rows.close() - - return NormalizedJSONStream(self._name, result_generator()) - - def close(self): - logging.info("Closing %s connection", self.connector_name()) - self._engine.dispose() diff --git a/nck/readers/yandex_statistics_reader.py b/nck/readers/yandex_statistics_reader.py deleted file mode 100644 index 8332c52e..00000000 --- a/nck/readers/yandex_statistics_reader.py +++ /dev/null @@ -1,227 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import datetime -import logging -import random -import time -from http import HTTPStatus -from typing import Dict, Tuple - -import click - -import nck.helpers.api_client_helper as api_client_helper -from nck.clients.api_client import ApiClient -from nck.commands.command import processor -from nck.helpers.yandex_helper import ( - DATE_RANGE_TYPES, - LANGUAGES, - OPERATORS, - REPORT_TYPES, - STATS_FIELDS, -) -from nck.readers.reader import Reader -from nck.streams.json_stream import JSONStream -from nck.utils.args import extract_args -from nck.utils.text import get_report_generator_from_flat_file - - -class StrList(click.ParamType): - def convert(self, value, param, ctx): - return value.split(",") - - -STR_LIST_TYPE = StrList() - -logger = logging.getLogger(__name__) - - -@click.command(name="read_yandex_statistics") -@click.option("--yandex-token", required=True) -@click.option("--yandex-report-language", type=click.Choice(LANGUAGES), default="en") -@click.option( - "--yandex-filter", - "yandex_filters", - multiple=True, - type=click.Tuple( - [click.Choice(STATS_FIELDS), click.Choice(OPERATORS), STR_LIST_TYPE] - ), -) -@click.option("--yandex-max-rows", type=int) -@click.option( - "--yandex-field-name", - "yandex_fields", - multiple=True, - type=click.Choice(STATS_FIELDS), - required=True, - help=( - "Fields to output in the report (columns)." - "For the full list of fields and their meanings, " - "see https://tech.yandex.com/direct/doc/reports/fields-list-docpage/" - ), -) -@click.option( - "--yandex-report-name", - default=f"stats_report_{datetime.date.today()}_{random.randrange(10000)}", -) -@click.option("--yandex-report-type", type=click.Choice(REPORT_TYPES), required=True) -@click.option("--yandex-date-range", type=click.Choice(DATE_RANGE_TYPES), required=True) -@click.option( - "--yandex-include-vat", - type=click.BOOL, - required=True, - help="Whether to include VAT in the monetary amounts in the report.", -) -@click.option("--yandex-date-start", type=click.DateTime()) -@click.option("--yandex-date-stop", type=click.DateTime()) -@processor("yandex_token") -def yandex_statistics(**kwargs): - return YandexStatisticsReader(**extract_args("yandex_", kwargs)) - - -YANDEX_DIRECT_API_BASE_URL = "https://api.direct.yandex.com/json/v5/" - - -class YandexStatisticsReader(Reader): - def __init__( - self, - token, - fields: Tuple[str], - report_type: str, - report_name: str, - date_range: str, - include_vat: bool, - **kwargs, - ): - self.token = token - self.fields = list(fields) - self.report_type = report_type - self.report_name = report_name - self.date_range = date_range - self.include_vat = include_vat - self.kwargs = kwargs - - def result_generator(self): - api_client = ApiClient(self.token, YANDEX_DIRECT_API_BASE_URL) - body = self._build_request_body() - headers = self._build_request_headers() - while True: - response = api_client.execute_request( - url="reports", body=body, headers=headers, stream=True - ) - if response.status_code == HTTPStatus.CREATED: - waiting_time = int(response.headers["retryIn"]) - logger.info( - f"Report added to queue. Should be ready in {waiting_time} min." - ) - time.sleep(waiting_time * 60) - elif response.status_code == HTTPStatus.ACCEPTED: - logger.info("Report in queue.") - elif response.status_code == HTTPStatus.OK: - logger.info("Report successfully retrieved.") - return get_report_generator_from_flat_file( - response.iter_lines(), delimiter="\t", skip_n_first=1, - ) - elif response.status_code == HTTPStatus.BAD_REQUEST: - logger.error("Invalid request.") - logger.error(response.json()) - break - elif response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR: - logger.error("Internal server error.") - logger.error(response.json()) - break - else: - logger.error(response.json()) - break - return None - - def _build_request_body(self) -> Dict: - body = {} - selection_criteria = self._add_custom_dates_if_set() - if len(self.kwargs["filters"]) > 0: - selection_criteria["Filter"] = [ - api_client_helper.get_dict_with_keys_converted_to_new_string_format( - field=filter_element[0], - operator=filter_element[1], - values=filter_element[2], - ) - for filter_element in self.kwargs["filters"] - ] - body[ - "params" - ] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( - selection_criteria=selection_criteria, - field_names=self.fields, - report_name=self.report_name, - report_type=self.report_type, - date_range_type=self.date_range, - format="TSV", - include_v_a_t="YES" if self.include_vat else "NO", - ) - if self.kwargs["max_rows"] is not None: - body["params"][ - "Page" - ] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( - limit=self.kwargs["max_rows"] - ) - return body - - def _build_request_headers(self) -> Dict: - return { - "skipReportSummary": "true", - "Accept-Language": self.kwargs["report_language"], - } - - def _add_custom_dates_if_set(self) -> Dict: - selection_criteria = {} - if ( - self.kwargs["date_start"] is not None - and self.kwargs["date_stop"] is not None - and self.date_range == "CUSTOM_DATE" - ): - selection_criteria["DateFrom"] = self.kwargs["date_start"].strftime( - "%Y-%m-%d" - ) - selection_criteria["DateTo"] = self.kwargs["date_stop"].strftime("%Y-%m-%d") - elif ( - self.kwargs["date_start"] is not None - and self.kwargs["date_stop"] is not None - and self.date_range != "CUSTOM_DATE" - ): - raise click.ClickException( - "Wrong date range. If start and stop dates are set, should be CUSTOM_DATE." - ) - elif ( - self.kwargs["date_start"] is not None - or self.kwargs["date_stop"] is not None - ) and self.date_range != "CUSTOM_DATE": - raise click.ClickException( - ( - "Wrong combination of date parameters. " - "Only use date start and date stop with date range set to CUSTOM_DATE." - ) - ) - elif ( - self.kwargs["date_start"] is None or self.kwargs["date_stop"] is None - ) and self.date_range == "CUSTOM_DATE": - raise click.ClickException( - "Missing at least one date. Have you set start and stop dates?" - ) - return selection_criteria - - def read(self): - yield JSONStream(f"results_{self.report_type}", self.result_generator()) diff --git a/nck/streams/README.md b/nck/streams/README.md deleted file mode 100644 index df1fdf99..00000000 --- a/nck/streams/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# NCK Streams - -Streams are an object that can be read by writers. -Each stream must implement the following methods: - -1. `readlines`: yield each element of a stream, one by one. - -Streams need a name and a content object to be created. diff --git a/nck/utils/date_handler.py b/nck/utils/date_handler.py deleted file mode 100644 index 08260fa2..00000000 --- a/nck/utils/date_handler.py +++ /dev/null @@ -1,21 +0,0 @@ -import calendar -from datetime import date, timedelta -from typing import Tuple - - -def get_date_start_and_date_stop_from_range( - date_range: str -) -> Tuple[date, date]: - today = date.today() - if date_range == "PREVIOUS_MONTH": - last_day_of_previous_month = \ - today.replace(day=1) - timedelta(days=1) - year = last_day_of_previous_month.year - month = last_day_of_previous_month.month - return date(year, month, 1), date(year, month, calendar.monthrange(year, month)[1]) - elif date_range == "PREVIOUS_WEEK": - # The API uses American standard, weeks go from sunday yo next saturday - first_day_of_last_week = today - timedelta(days=today.weekday() + 1, weeks=1) - return first_day_of_last_week, first_day_of_last_week + timedelta(days=6) - else: - return None diff --git a/nck/writers/README.md b/nck/writers/README.md deleted file mode 100644 index e2e9445e..00000000 --- a/nck/writers/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# NCK Writers - -Writers role is to write Stream object to a location. - -Each writer must implement ```write()``` method. - -## Step to create a new Writer - -1. Create python module following naming nomenclature ``` [command]_writer.py ``` -2. Update writing strategy in the [Runner.py file](../runner.py). diff --git a/nck/writers/gcs_writer.py b/nck/writers/gcs_writer.py deleted file mode 100644 index 4f9242d2..00000000 --- a/nck/writers/gcs_writer.py +++ /dev/null @@ -1,107 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import config -import logging -import os -from nck.helpers.google_base import GoogleBaseClass -import click - -from nck.writers.writer import Writer -from nck.commands.command import processor -from nck.utils.args import extract_args -from google.cloud import storage - - -@click.command(name="write_gcs") -@click.option("--gcs-bucket", help="GCS Bucket", required=True) -@click.option("--gcs-prefix", help="GCS path to write the file.") -@click.option("--gcs-project-id", help="GCS Project Id") -@click.option( - "--gcs-file-name", - help="Override the default name of the file (don't add the extension)", -) -@processor() -def gcs(**kwargs): - return GCSWriter(**extract_args("gcs_", kwargs)) - - -class GCSWriter(Writer, GoogleBaseClass): - _client = None - - def __init__(self, bucket, project_id, prefix=None, file_name=None): - project_id = self.get_project_id(project_id) - self._client = storage.Client( - credentials=self._get_credentials(), project=project_id - ) - self._bucket = self._client.bucket(bucket) - self._prefix = prefix - self._file_name = file_name - - def write(self, stream): - """ - Write file into GCS Bucket - - attr: - stream: Stream with the file content. - return: - gcs_path (str): Path to file {bucket}/{prefix}{file_name} - """ - logging.info("Writing file to GCS") - _, extension = self._extract_extension(stream.name) - file_name = ( - self._extract_extension(self._file_name)[0] + extension - if self._file_name is not None - else stream.name - ) - blob = self.create_blob(file_name) - blob.upload_from_file(stream.as_file(), content_type=stream.mime_type) - uri = self.uri_for_name(file_name) - - logging.info("Uploaded file to {}".format(uri)) - - return uri, blob - - def create_blob(self, name): - filename = self.path_for_name(name) - return self._bucket.blob(filename) - - def uri_for_name(self, name): - path = self.path_for_name(name) - return "gs://{bucket}/{path}".format(bucket=self._bucket.name, path=path) - - def path_for_name(self, name): - if self._prefix: - return os.path.join(self._prefix, name) - return name - - @staticmethod - def _extract_extension(full_file_name: str): - """Returns a tuple: file_name, extension""" - return os.path.splitext(full_file_name) - - @staticmethod - def get_project_id(project_id): - if project_id is None: - try: - return config.PROJECT_ID - except Exception: - raise click.exceptions.MissingParameter( - "Please provide a project id in ENV var or params.", - param_type="--gcs-project-id", - ) - return project_id diff --git a/nck/writers/s3_writer.py b/nck/writers/s3_writer.py deleted file mode 100644 index 49fad683..00000000 --- a/nck/writers/s3_writer.py +++ /dev/null @@ -1,90 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import logging -import click -import boto3 -from nck.writers.writer import Writer -from nck.commands.command import processor -from nck.utils.args import extract_args -from nck.utils.retry import retry - - -@click.command(name="write_s3") -@click.option("--s3-bucket-name", help="S3 Bucket name", required=True) -@click.option("--s3-bucket-region", required=True) -@click.option("--s3-access-key-id", required=True) -@click.option("--s3-access-key-secret", required=True) -@click.option("--s3-prefix", help="s3 Prefix", default=None) -@click.option( - "--s3-filename", help="Filename (without prefix). Be sure to add file extension." -) -@processor("s3_access_key_id", "s3_access_key_secret") -def s3(**kwargs): - return S3Writer(**extract_args("s3_", kwargs)) - - -class S3Writer(Writer): - def __init__( - self, bucket_name, access_key_id, access_key_secret, bucket_region, **kwargs - ): - boto_config = { - "region_name": bucket_region, - "aws_access_key_id": access_key_id, - "aws_secret_access_key": access_key_secret, - } - self._bucket_name = bucket_name - self._bucket_region = bucket_region - self._s3_resource = boto3.resource("s3", **boto_config) - self.kwargs = kwargs - - @retry - def write(self, stream): - - logging.info("Start writing file to S3 ...") - bucket = self._s3_resource.Bucket(self._bucket_name) - - if bucket not in self._s3_resource.buckets.all(): - self._s3_resource.create_bucket( - Bucket=self._bucket_name, - CreateBucketConfiguration={"LocationConstraint": self._bucket_region}, - ) - - bucket_region = self._s3_resource.meta.client.get_bucket_location( - Bucket=self._bucket_name - )["LocationConstraint"] - - # if the bucket region doesn't match the presigned url generated, will not work - assert ( - bucket_region == self._bucket_region - ), "the region you provided ({}) does'nt match the bucket's found region : ({}) ".format( - self._bucket_region, bucket_region - ) - if self.kwargs.get("prefix"): - prefix = self.kwargs.get("prefix") + "/" - else: - prefix = "" - - filename = f"{prefix}{self.kwargs['filename'] if self.kwargs['filename'] is not None else stream.name}" - bucket.upload_fileobj(stream.as_file(), filename) - url_file = self._s3_resource.meta.client.generate_presigned_url( - "get_object", - Params={"Bucket": self._bucket_name, "Key": stream.name}, - ExpiresIn=3600, - ) - logging.info(f"file written at location {url_file}") - return url_file, bucket diff --git a/pyproject.toml b/pyproject.toml index e34796ec..aba83cd8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,2 @@ [tool.black] -line-length = 120 \ No newline at end of file +line-length = 127 \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt index c8adfde3..c86311bc 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,8 +1,12 @@ -r requirements.txt +black==19.10b0 +flake8==3.8.3 +freezegun==0.3.15 ipdb -pytest -flake8 -nose +moto==2.0.1 +nose==1.3.7 parameterized==0.7.1 -freezegun==0.3.15 -pre-commit==2.7.1 \ No newline at end of file +pre-commit==2.7.1 +pytest==6.0.1 +sphinx +sphinx-rtd-theme diff --git a/requirements.txt b/requirements.txt index 643c47c7..7eec3a82 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,13 +7,12 @@ chardet==3.0.4 Click==7.0 colorama==0.4.3 curlify==2.2.1 -cx-Oracle==7.3.0 docopt==0.6.2 docutils==0.15.2 -facebook-business==8.0.4 +facebook-business==8.0.5 google-api-core==1.14.3 google-api-python-client==1.4.2 -google-auth==1.7.2 +google-auth==1.28.0 google-auth-httplib2==0.0.3 google-cloud-bigquery==1.22.0 google-cloud-core==1.1.0 @@ -23,17 +22,17 @@ googleanalytics==0.26.0 googleapis-common-protos==1.6.0 gspread==3.1.0 hiredis==1.0.1 -httplib2==0.18.0 +httplib2==0.19.0 idna==2.8 inspect-it==0.3.2 -Jinja2==2.10.3 +Jinja2==2.11.3 jmespath==0.9.4 keyring==5.3 -lxml==4.4.2 +lxml==4.6.3 MarkupSafe==1.1.1 more-itertools==8.0.2 num2words==0.5.10 -numpy==1.17.3 +numpy==1.20.2 oauth2client==1.5.2 prettytable==0.7.2 protobuf==3.11.1 @@ -44,12 +43,12 @@ pylev==1.3.0 PyMySQL==0.9.3 python-dateutil==2.8.0 pytz==2019.3 -PyYAML==5.2 +PyYAML==5.4 radarly-py==1.0.10 redis==3.3.11 requests==2.22.0 requests-toolbelt==0.9.1 -rsa==4.0 +rsa==4.7.2 s3transfer==0.2.1 six==1.13.0 snakify==1.1.1 @@ -61,6 +60,8 @@ uritemplate==3.0.0 urllib3==1.25.7 Werkzeug==0.16.0 googleads==22.0.0 -twitter-ads==7.0.1 +twitter-ads==8.0.0 pyjwt==1.7.1 -cryptography==2.9 +cryptography==3.3.2 +bs4==0.0.1 +pydantic==1.8.1 diff --git a/setup.py b/setup.py deleted file mode 100644 index 95134467..00000000 --- a/setup.py +++ /dev/null @@ -1,55 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -"""The setup script.""" - -from setuptools import setup, find_packages - - -with open('requirements.txt') as requirements_file: - requirements = [el.strip() for el in requirements_file.readlines()] - -setup_requirements = [] - -setup( - author="Artefact", - author_email='', - classifiers=[ - 'Development Status :: 2 - Pre-Alpha', - 'Intended Audience :: Developers', - 'Natural Language :: English', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - ], - description="API connectors", - entry_points={ - 'console_scripts': [ - 'nckrun=nck.entrypoint:cli_entrypoint', - ], - }, - install_requires=requirements, - include_package_data=True, - name='nck', - packages=find_packages(), - setup_requires=setup_requirements, - url='https://github.com/artefactory/nautilus-connectors-kit', - version='0.1.0', - zip_safe=False, -) diff --git a/tests/README.md b/tests/README.md deleted file mode 100644 index fb6df3df..00000000 --- a/tests/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# Running the tests - -To run the tests, install the dev dependencies: - -```bash -pip install -r requirements-dev.txt -``` - -And then run: - -```bash -nosetests -``` - -It will find all the tests and run them. - -# Writing a test - -You file should start or finish with **test** or **tests**. - -**WIP** \ No newline at end of file diff --git a/tests/clients/__init__.py b/tests/clients/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/clients/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/clients/api/__init__.py b/tests/clients/api/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/clients/api/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/helpers/test_api_client_helper.py b/tests/clients/api/test_helper.py similarity index 58% rename from tests/helpers/test_api_client_helper.py rename to tests/clients/api/test_helper.py index 6e31dbfe..507b9859 100644 --- a/tests/helpers/test_api_client_helper.py +++ b/tests/clients/api/test_helper.py @@ -16,45 +16,31 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import unittest -import logging +from ack.clients.api.helper import get_dict_with_keys_converted_to_new_string_format, to_pascal_key from parameterized import parameterized -from nck.helpers.api_client_helper import (get_dict_with_keys_converted_to_new_string_format, - to_pascal_key) - class ApiClientHelperTest(unittest.TestCase): - def test_string_conversion_to_camel_case(self): self.assertDictEqual( - get_dict_with_keys_converted_to_new_string_format( - abc_de=1, - abc="abc", - abc_de_fg=2 - ), - { - "AbcDe": 1, - "Abc": "abc", - "AbcDeFg": 2 - } + get_dict_with_keys_converted_to_new_string_format(abc_de=1, abc="abc", abc_de_fg=2), + {"AbcDe": 1, "Abc": "abc", "AbcDeFg": 2}, ) - @parameterized.expand([ - ("test", "Test"), - ("test_test", "TestTest"), - ("test_test_test", "TestTestTest"), - ("tEST", "Test"), - ("t_e_s_t", "TEST") - ]) + @parameterized.expand( + [ + ("test", "Test"), + ("test_test", "TestTest"), + ("test_test_test", "TestTestTest"), + ("tEST", "Test"), + ("t_e_s_t", "TEST"), + ] + ) def test_to_pascal_key(self, key, pascal_key): - self.assertEquals(to_pascal_key(key), pascal_key) + self.assertEqual(to_pascal_key(key), pascal_key) def test_unknown_case(self): with self.assertLogs() as cm: - logging.getLogger("ApiClient") get_dict_with_keys_converted_to_new_string_format("UnknownCase") - self.assertEqual( - cm.output, - ["ERROR:root:Unable to convert to new string format. Format not in ['PascalCase']"] - ) + self.assertEqual(cm.output, ["ERROR:root:Unable to convert to new string format. Format not in ['PascalCase']"]) diff --git a/tests/clients/google/__init__.py b/tests/clients/google/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/clients/google/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/clients/google/test_client.py b/tests/clients/google/test_client.py new file mode 100644 index 00000000..9d0be0bb --- /dev/null +++ b/tests/clients/google/test_client.py @@ -0,0 +1,72 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import json +import os +import unittest +from unittest import mock + +import ack.clients.google.client + +MODULE_NAME = "ack.clients.google.client" + + +class GoogleClientTest(unittest.TestCase): + def setUp(self): + self.instance = ack.clients.google.client.GoogleClient() + + @mock.patch(MODULE_NAME + ".google.auth.default", return_value=("CREDENTIALS", "PROJECT_ID")) + def test_get_credentials_and_project_id_with_default_auth(self, mock_auth_default): + result = self.instance._get_credentials_and_project_id() + mock_auth_default.assert_called_once_with(scopes=self.instance.scopes) + self.assertEqual(("CREDENTIALS", "PROJECT_ID"), result) + + @mock.patch( + MODULE_NAME + ".google.oauth2.service_account.Credentials" ".from_service_account_file", + **{"return_value.project_id": "PROJECT_ID"} + ) + @mock.patch.dict(os.environ, {"GCP_KEY_PATH": "KEY_PATH.json"}) + def test_get_credentials_and_project_id_with_service_account_file(self, mock_from_service_account_file): + result = self.instance._get_credentials_and_project_id() + mock_from_service_account_file.assert_called_once_with("KEY_PATH.json", scopes=self.instance.scopes) + self.assertEqual((mock_from_service_account_file.return_value, "PROJECT_ID"), result) + + @mock.patch(MODULE_NAME + ".google.oauth2.service_account.Credentials" ".from_service_account_file") + @mock.patch.dict(os.environ, {"GCP_KEY_PATH": "KEY_PATH.p12"}) + def test_get_credentials_and_project_id_with_service_account_file_and_p12_key(self, mock_from_service_account_file): + with self.assertRaises(Exception): + self.instance._get_credentials_and_project_id() + + @mock.patch(MODULE_NAME + ".google.oauth2.service_account.Credentials" ".from_service_account_file") + @mock.patch.dict(os.environ, {"GCP_KEY_PATH": "KEY_PATH.unknown"}) + def test_get_credentials_and_project_id_with_service_account_file_and_unknown_key(self, mock_from_service_account_file): + with self.assertRaises(Exception): + self.instance._get_credentials_and_project_id() + + @mock.patch( + MODULE_NAME + ".google.oauth2.service_account.Credentials" ".from_service_account_info", + **{"return_value.project_id": "PROJECT_ID"} + ) + @mock.patch.dict(os.environ, {"GCP_KEY_JSON": json.dumps({"private_key": "PRIVATE_KEY"})}) + def test_get_credentials_and_project_id_with_service_account_info(self, mock_from_service_account_file): + result = self.instance._get_credentials_and_project_id() + mock_from_service_account_file.assert_called_once_with({"private_key": "PRIVATE_KEY"}, scopes=self.instance.scopes) + self.assertEqual((mock_from_service_account_file.return_value, "PROJECT_ID"), result) + + def test_default_scopes(self): + self.assertEqual(self.instance.scopes, ("https://www.googleapis.com/auth/cloud-platform",)) diff --git a/tests/clients/google_dcm/__init__.py b/tests/clients/google_dcm/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/clients/google_dcm/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/clients/test_dcm_client.py b/tests/clients/google_dcm/test_client.py similarity index 82% rename from tests/clients/test_dcm_client.py rename to tests/clients/google_dcm/test_client.py index 0b3629ad..b9f6c7b9 100644 --- a/tests/clients/test_dcm_client.py +++ b/tests/clients/google_dcm/test_client.py @@ -15,10 +15,11 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from unittest import TestCase, mock + from datetime import datetime +from unittest import TestCase, mock -from nck.clients.dcm_client import DCMClient +from ack.clients.google_dcm.client import GoogleDCMClient class MockService: @@ -36,20 +37,20 @@ def mock_service(*args, **kwargs): return MockService() -class DCMClientTest(TestCase): +class GoogleDCMClientTest(TestCase): def mock_dcm_client(self, **kwargs): for param, value in kwargs.items(): setattr(self, param, value) kwargs = {"_service": mock_service()} - @mock.patch.object(DCMClient, "__init__", mock_dcm_client) + @mock.patch.object(GoogleDCMClient, "__init__", mock_dcm_client) def test_add_report_criteria(self): report = {"name": "report"} start = datetime(year=2020, month=1, day=1) end = datetime(year=2020, month=2, day=1) elements = ["a", "b"] - DCMClient(**self.kwargs).add_report_criteria(report, start, end, elements, elements) + GoogleDCMClient(**self.kwargs).add_report_criteria(report, start, end, elements, elements) expected = { "name": "report", "criteria": { @@ -60,13 +61,13 @@ def test_add_report_criteria(self): } assert report == expected - @mock.patch.object(DCMClient, "__init__", mock_dcm_client) + @mock.patch.object(GoogleDCMClient, "__init__", mock_dcm_client) @mock.patch.object(MockService, "execute", lambda *args: {"items": [{"value": "ok"}, {"value": "nok"}]}) - @mock.patch("tests.clients.test_dcm_client.MockService") + @mock.patch("tests.clients.google_dcm.test_client.MockService") def test_add_dimension_filters(self, mock_filter): report = {"criteria": {"dateRange": {"endDate": "", "startDate": ""}}} profile_id = "" filters = [("filter", "ok")] - DCMClient(**self.kwargs).add_dimension_filters(report, profile_id, filters) + GoogleDCMClient(**self.kwargs).add_dimension_filters(report, profile_id, filters) expected = {"criteria": {"dateRange": {"endDate": "", "startDate": ""}, "dimensionFilters": [{"value": "ok"}]}} assert report == expected diff --git a/tests/clients/google_sa360/__init__.py b/tests/clients/google_sa360/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/clients/google_sa360/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/clients/test_sa360_client.py b/tests/clients/google_sa360/test_client.py similarity index 85% rename from tests/clients/test_sa360_client.py rename to tests/clients/google_sa360/test_client.py index 87f606f3..2fb03cf3 100644 --- a/tests/clients/test_sa360_client.py +++ b/tests/clients/google_sa360/test_client.py @@ -16,12 +16,12 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. from unittest import TestCase -from nck.clients.sa360_client import SA360Client +from ack.clients.google_sa360.client import GoogleSA360Client -class SA360ClientTest(TestCase): +class GoogleSA360ClientTest(TestCase): def test_generate_all_columns(self): standard = ["clicks", "impressions"] saved = ["savedColumn"] expected = [{"columnName": "clicks"}, {"columnName": "impressions"}, {"savedColumnName": "savedColumn"}] - self.assertEqual(SA360Client.generate_columns(standard, saved), expected) + self.assertEqual(GoogleSA360Client.generate_columns(standard, saved), expected) diff --git a/tests/clients/test_api_client.py b/tests/clients/test_api_client.py deleted file mode 100644 index c737a88c..00000000 --- a/tests/clients/test_api_client.py +++ /dev/null @@ -1,64 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from unittest import TestCase - -from nck.clients.api_client import ApiClient - - -class ApiClientTest(TestCase): - - def test_get_formatted_request_body(self): - selection_criteria = { - "Filter": [ - { - "Field": "CampaignId", - "Operator": "IN", - "Values": ["123", "456"] - } - ] - } - page = { - "Limit": 10 - } - field_names = ["AdGroupId", "Year", "CampaignName"] - report_name = "test" - report_type = "CAMPAIGN_PERFORMANCE_REPORT" - date_range_type = "ALL_TIME" - include_vat = "NO" - - expected_output = { - "SelectionCriteria": selection_criteria, - "Page": page, - "FieldNames": field_names, - "ReportName": report_name, - "ReportType": report_type, - "DateRangeType": date_range_type, - "IncludeVAT": include_vat - } - self.assertDictEqual( - ApiClient.get_formatted_request_body( - selection_criteria=selection_criteria, - page=page, - field_names=field_names, - report_name=report_name, - report_type=report_type, - date_range_type=date_range_type, - include_v_a_t=include_vat - ), - expected_output - ) diff --git a/tests/helpers/google_base_tests.py b/tests/helpers/google_base_tests.py deleted file mode 100644 index fee08e05..00000000 --- a/tests/helpers/google_base_tests.py +++ /dev/null @@ -1,94 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import unittest -import nck.helpers.google_base -from unittest import mock -import json -import os - -MODULE_NAME = 'nck.helpers.google_base' - - -class TestGoogleCloudBaseClass(unittest.TestCase): - def setUp(self): - self.instance = nck.helpers.google_base.GoogleBaseClass() - - @mock.patch(MODULE_NAME + '.google.auth.default', - return_value=("CREDENTIALS", "PROJECT_ID")) - def test_get_credentials_and_project_id_with_default_auth(self, mock_auth_default): - result = self.instance._get_credentials_and_project_id() - mock_auth_default.assert_called_once_with(scopes=self.instance.scopes) - self.assertEqual(('CREDENTIALS', 'PROJECT_ID'), result) - - @mock.patch( - MODULE_NAME + '.google.oauth2.service_account.Credentials' - '.from_service_account_file', - **{'return_value.project_id': "PROJECT_ID"} - ) - @mock.patch.dict(os.environ, {'GCP_KEY_PATH': 'KEY_PATH.json'}) - def test_get_credentials_and_project_id_with_service_account_file(self, - mock_from_service_account_file): - result = self.instance._get_credentials_and_project_id() - mock_from_service_account_file.assert_called_once_with('KEY_PATH.json', - scopes=self.instance.scopes) - self.assertEqual((mock_from_service_account_file.return_value, 'PROJECT_ID'), - result) - - @mock.patch( - MODULE_NAME + '.google.oauth2.service_account.Credentials' - '.from_service_account_file') - @mock.patch.dict(os.environ, {'GCP_KEY_PATH': 'KEY_PATH.p12'}) - def test_get_credentials_and_project_id_with_service_account_file_and_p12_key( - self, - mock_from_service_account_file - ): - with self.assertRaises(Exception): - self.instance._get_credentials_and_project_id() - - @mock.patch( - MODULE_NAME + '.google.oauth2.service_account.Credentials' - '.from_service_account_file') - @mock.patch.dict(os.environ, {'GCP_KEY_PATH': 'KEY_PATH.unknown'}) - def test_get_credentials_and_project_id_with_service_account_file_and_unknown_key( - self, - mock_from_service_account_file - ): - with self.assertRaises(Exception): - self.instance._get_credentials_and_project_id() - - @mock.patch( - MODULE_NAME + '.google.oauth2.service_account.Credentials' - '.from_service_account_info', - **{'return_value.project_id': "PROJECT_ID"} - ) - @mock.patch.dict(os.environ, {'GCP_KEY_JSON': json.dumps({ - 'private_key': "PRIVATE_KEY" - })}) - def test_get_credentials_and_project_id_with_service_account_info(self, - mock_from_service_account_file): - result = self.instance._get_credentials_and_project_id() - mock_from_service_account_file.assert_called_once_with({ - 'private_key': "PRIVATE_KEY" - }, - scopes=self.instance.scopes) - self.assertEqual((mock_from_service_account_file.return_value, 'PROJECT_ID'), - result) - - def test_default_scopes(self): - self.assertEqual(self.instance.scopes, - ('https://www.googleapis.com/auth/cloud-platform',)) diff --git a/tests/readers/adobe_analytics_1_4/__init__.py b/tests/readers/adobe_analytics_1_4/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/adobe_analytics_1_4/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_adobe_reader.py b/tests/readers/adobe_analytics_1_4/test_reader.py similarity index 73% rename from tests/readers/test_adobe_reader.py rename to tests/readers/adobe_analytics_1_4/test_reader.py index 66480100..3670f036 100644 --- a/tests/readers/test_adobe_reader.py +++ b/tests/readers/adobe_analytics_1_4/test_reader.py @@ -15,12 +15,14 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + import datetime -from nck.readers.adobe_reader import AdobeReader from unittest import TestCase, mock +from ack.readers.adobe_analytics_1_4.reader import AdobeAnalytics14Reader + -class AdobeReaderTest(TestCase): +class AdobeAnalytics14ReaderTest(TestCase): DATEFORMAT = "%Y-%m-%d" @@ -52,16 +54,11 @@ class AdobeReaderTest(TestCase): "end_date": datetime.datetime(2020, 1, 3), } - @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) - @mock.patch( - "nck.readers.adobe_reader.AdobeReader.query_report", - return_value={"reportID": "XXXXX"}, - ) + @mock.patch("ack.clients.adobe_analytics.client.AdobeAnalyticsClient.__init__", return_value=None) @mock.patch( - "nck.readers.adobe_reader.AdobeReader.download_report", return_value=None + "ack.readers.adobe_analytics_1_4.reader.AdobeAnalytics14Reader.query_report", return_value={"reportID": "XXXXX"}, ) - def test_read_empty_data( - self, mock_adobe_client, mock_query_report, mock_download_report - ): - reader = AdobeReader(**self.kwargs) + @mock.patch("ack.readers.adobe_analytics_1_4.reader.AdobeAnalytics14Reader.download_report", return_value=None) + def test_read_empty_data(self, mock_adobe_client, mock_query_report, mock_download_report): + reader = AdobeAnalytics14Reader(**self.kwargs) self.assertFalse(len(list(reader.read())) > 1) diff --git a/tests/readers/adobe_analytics_2_0/__init__.py b/tests/readers/adobe_analytics_2_0/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/adobe_analytics_2_0/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_adobe_reader_2_0.py b/tests/readers/adobe_analytics_2_0/test_reader.py similarity index 67% rename from tests/readers/test_adobe_reader_2_0.py rename to tests/readers/adobe_analytics_2_0/test_reader.py index e953227a..22c69d02 100644 --- a/tests/readers/test_adobe_reader_2_0.py +++ b/tests/readers/adobe_analytics_2_0/test_reader.py @@ -16,13 +16,13 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from nck.readers.adobe_reader_2_0 import AdobeReader_2_0 +import datetime from unittest import TestCase, mock -import datetime +from ack.readers.adobe_analytics_2_0.reader import AdobeAnalytics20Reader -class AdobeReaderTest_2_0(TestCase): +class AdobeAnalytics20ReaderTest(TestCase): kwargs = { "client_id": "", @@ -36,99 +36,62 @@ class AdobeReaderTest_2_0(TestCase): "metric": [], "start_date": datetime.date(2020, 1, 1), "end_date": datetime.date(2020, 1, 2), + "date_range": None, } - @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) - def test_build_date_range(self, mock_adobe_client): - output = AdobeReader_2_0(**self.kwargs).build_date_range() + @mock.patch("ack.clients.adobe_analytics.client.AdobeAnalyticsClient.__init__", return_value=None) + def test_format_date_range(self, mock_adobe_client): + output = AdobeAnalytics20Reader(**self.kwargs).format_date_range() expected = "2020-01-01T00:00:00/2020-01-03T00:00:00" self.assertEqual(output, expected) - @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + @mock.patch("ack.clients.adobe_analytics.client.AdobeAnalyticsClient.__init__", return_value=None) def test_build_report_description_one_dimension(self, mock_adobe_client): temp_kwargs = self.kwargs.copy() temp_kwargs.update({"dimension": ["daterangeday"]}) metrics = ["visits", "bounces"] - output = AdobeReader_2_0(**temp_kwargs).build_report_description(metrics) + output = AdobeAnalytics20Reader(**temp_kwargs).build_report_description(metrics) expected = { "rsid": "XXXXXXXXX", - "globalFilters": [ - { - "type": "dateRange", - "dateRange": "2020-01-01T00:00:00/2020-01-03T00:00:00", - } - ], + "globalFilters": [{"type": "dateRange", "dateRange": "2020-01-01T00:00:00/2020-01-03T00:00:00"}], "metricContainer": { "metricFilters": [], - "metrics": [ - {"id": "metrics/visits", "filters": []}, - {"id": "metrics/bounces", "filters": []}, - ], + "metrics": [{"id": "metrics/visits", "filters": []}, {"id": "metrics/bounces", "filters": []}], }, "dimension": "variables/daterangeday", "settings": {"countRepeatInstances": "true", "limit": "5000"}, } self.assertEqual(output, expected) - @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + @mock.patch("ack.clients.adobe_analytics.client.AdobeAnalyticsClient.__init__", return_value=None) def test_build_report_description_multiple_dimensions(self, mock_adobe_client): temp_kwargs = self.kwargs.copy() temp_kwargs.update({"dimension": ["daterangeday", "campaign", "pagename"]}) metrics = ["visits", "bounces"] breakdown_item_ids = ["000000000", "111111111"] - output = AdobeReader_2_0(**temp_kwargs).build_report_description( - metrics, breakdown_item_ids - ) + output = AdobeAnalytics20Reader(**temp_kwargs).build_report_description(metrics, breakdown_item_ids) expected = { "rsid": "XXXXXXXXX", - "globalFilters": [ - { - "type": "dateRange", - "dateRange": "2020-01-01T00:00:00/2020-01-03T00:00:00", - } - ], + "globalFilters": [{"type": "dateRange", "dateRange": "2020-01-01T00:00:00/2020-01-03T00:00:00"}], "metricContainer": { "metricFilters": [ - { - "id": 0, - "type": "breakdown", - "dimension": "variables/daterangeday", - "itemId": "000000000", - }, - { - "id": 1, - "type": "breakdown", - "dimension": "variables/campaign", - "itemId": "111111111", - }, - { - "id": 2, - "type": "breakdown", - "dimension": "variables/daterangeday", - "itemId": "000000000", - }, - { - "id": 3, - "type": "breakdown", - "dimension": "variables/campaign", - "itemId": "111111111", - }, - ], - "metrics": [ - {"id": "metrics/visits", "filters": [0, 1]}, - {"id": "metrics/bounces", "filters": [2, 3]}, + {"id": 0, "type": "breakdown", "dimension": "variables/daterangeday", "itemId": "000000000"}, + {"id": 1, "type": "breakdown", "dimension": "variables/campaign", "itemId": "111111111"}, + {"id": 2, "type": "breakdown", "dimension": "variables/daterangeday", "itemId": "000000000"}, + {"id": 3, "type": "breakdown", "dimension": "variables/campaign", "itemId": "111111111"}, ], + "metrics": [{"id": "metrics/visits", "filters": [0, 1]}, {"id": "metrics/bounces", "filters": [2, 3]}], }, "dimension": "variables/pagename", "settings": {"countRepeatInstances": "true", "limit": "5000"}, } self.assertEqual(output, expected) - @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + @mock.patch("ack.clients.adobe_analytics.client.AdobeAnalyticsClient.__init__", return_value=None) @mock.patch( - "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_report_page", + "ack.readers.adobe_analytics_2_0.reader.AdobeAnalytics20Reader.get_report_page", side_effect=[ { "totalPages": 2, @@ -155,17 +118,11 @@ def test_build_report_description_multiple_dimensions(self, mock_adobe_client): def test_get_parsed_report(self, mock_adobe_client, mock_get_report_page): temp_kwargs = self.kwargs.copy() temp_kwargs.update( - { - "dimension": ["daterangeday"], - "start_date": datetime.date(2020, 1, 1), - "end_date": datetime.date(2020, 1, 4), - } + {"dimension": ["daterangeday"], "start_date": datetime.date(2020, 1, 1), "end_date": datetime.date(2020, 1, 4)} ) metrics = ["visits", "bounces"] - output = AdobeReader_2_0(**temp_kwargs).get_parsed_report( - {"dimension": "variables/daterangeday"}, metrics - ) + output = AdobeAnalytics20Reader(**temp_kwargs).get_parsed_report({"dimension": "variables/daterangeday"}, metrics) expected = [ {"daterangeday": "2020-01-01", "visits": 11, "bounces": 21}, {"daterangeday": "2020-01-02", "visits": 12, "bounces": 22}, @@ -175,13 +132,10 @@ def test_get_parsed_report(self, mock_adobe_client, mock_get_report_page): for output_record, expected_record in zip(output, expected): self.assertEqual(output_record, expected_record) - @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + @mock.patch("ack.clients.adobe_analytics.client.AdobeAnalyticsClient.__init__", return_value=None) @mock.patch( - "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_node_values", - return_value={ - "lasttouchchannel_1": "Paid Search", - "lasttouchchannel_2": "Natural_Search", - }, + "ack.readers.adobe_analytics_2_0.reader.AdobeAnalytics20Reader.get_node_values", + return_value={"lasttouchchannel_1": "Paid Search", "lasttouchchannel_2": "Natural_Search"}, ) def test_add_child_nodes_to_graph(self, mock_adobe_client, mock_get_node_values): graph = { @@ -192,9 +146,7 @@ def test_add_child_nodes_to_graph(self, mock_adobe_client, mock_get_node_values) node = "daterangeday_1200201" path_to_node = ["daterangeday_1200201"] - output = AdobeReader_2_0(**self.kwargs).add_child_nodes_to_graph( - graph, node, path_to_node - ) + output = AdobeAnalytics20Reader(**self.kwargs).add_child_nodes_to_graph(graph, node, path_to_node) expected = { "root": ["daterangeday_1200201", "daterangeday_1200202"], "daterangeday_1200201": ["lasttouchchannel_1", "lasttouchchannel_2"], @@ -204,23 +156,19 @@ def test_add_child_nodes_to_graph(self, mock_adobe_client, mock_get_node_values) } self.assertEqual(output, expected) - @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + @mock.patch("ack.clients.adobe_analytics.client.AdobeAnalyticsClient.__init__", return_value=None) @mock.patch( - "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_parsed_report", + "ack.readers.adobe_analytics_2_0.reader.AdobeAnalytics20Reader.get_parsed_report", return_value=[ {"daterangeday": "2020-01-01", "visits": 11, "bounces": 21}, {"daterangeday": "2020-01-02", "visits": 12, "bounces": 22}, ], ) - def test_read_one_dimension_reports( - self, mock_adobe_client, mock_get_parsed_report - ): + def test_read_one_dimension_reports(self, mock_adobe_client, mock_get_parsed_report): temp_kwargs = self.kwargs.copy() - temp_kwargs.update( - {"dimension": ["daterangeday"], "metric": ["visits", "bounces"]} - ) + temp_kwargs.update({"dimension": ["daterangeday"], "metric": ["visits", "bounces"]}) - output = next(AdobeReader_2_0(**temp_kwargs).read()) + output = next(AdobeAnalytics20Reader(**temp_kwargs).read()) expected = [ {"daterangeday": "2020-01-01", "visits": 11, "bounces": 21}, {"daterangeday": "2020-01-02", "visits": 12, "bounces": 22}, @@ -228,9 +176,9 @@ def test_read_one_dimension_reports( for output_record, expected_output in zip(output.readlines(), iter(expected)): self.assertEqual(output_record, expected_output) - @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + @mock.patch("ack.clients.adobe_analytics.client.AdobeAnalyticsClient.__init__", return_value=None) @mock.patch( - "nck.readers.adobe_reader_2_0.AdobeReader_2_0.add_child_nodes_to_graph", + "ack.readers.adobe_analytics_2_0.reader.AdobeAnalytics20Reader.add_child_nodes_to_graph", side_effect=[ { "root": ["daterangeday_1200201", "daterangeday_1200202"], @@ -253,7 +201,7 @@ def test_read_one_dimension_reports( ], ) @mock.patch( - "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_parsed_report", + "ack.readers.adobe_analytics_2_0.reader.AdobeAnalytics20Reader.get_parsed_report", side_effect=[ [ { @@ -289,17 +237,10 @@ def test_read_one_dimension_reports( ], ], ) - def test_read_multiple_dimension_reports( - self, mock_adobe_client, mock_add_child_nodes_to_graph, mock_get_parsed_report - ): + def test_read_multiple_dimension_reports(self, mock_adobe_client, mock_add_child_nodes_to_graph, mock_get_parsed_report): temp_kwargs = self.kwargs.copy() - temp_kwargs.update( - { - "dimension": ["daterangeday", "lastouchchannel", "campaign"], - "metric": ["visits", "bounces"], - } - ) - reader = AdobeReader_2_0(**temp_kwargs) + temp_kwargs.update({"dimension": ["daterangeday", "lastouchchannel", "campaign"], "metric": ["visits", "bounces"]}) + reader = AdobeAnalytics20Reader(**temp_kwargs) reader.node_values = { "daterangeday_1200201": "Jan 1, 2020", "daterangeday_1200202": "Jan 2, 2020", diff --git a/tests/readers/confluence/__init__.py b/tests/readers/confluence/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/confluence/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/confluence/test_helper.py b/tests/readers/confluence/test_helper.py new file mode 100644 index 00000000..f935e5d2 --- /dev/null +++ b/tests/readers/confluence/test_helper.py @@ -0,0 +1,230 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from unittest import TestCase + +from bs4 import BeautifulSoup +from parameterized import parameterized + +PARAGRAPH_OF_200_CHARACTERS = ( + "Lorem ipsum sit amet cursus sit amet dictum sit amet justo donec" + "enim diam vulputate ut pharetra sit amet aliquam id diam maecenas" + "ultricies mi eget mauris pharetra et ultrices neque ornare aenean felis" +) + +HTML_BODY = ( + "" + "

Case ID card

" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + "
Salesforce ID0061t000003WEJ3AAO
ConfidentialityGreenPublic
Article statusYellowIn progress
IndustryAutomotive
Client companyMichelin
ScopeFrance
Mission start date2020/01/01
Mission end date8 weeks
Amount sold45 KEUR
Mission topicPrecision Marketing
Commercial proposalClick here
One pagerClick here
ArchitectureClick here
" + "
" + f"

Key learnings

{PARAGRAPH_OF_200_CHARACTERS}

" + f"

I. Context

{PARAGRAPH_OF_200_CHARACTERS}

" + f"

II. Approach

{PARAGRAPH_OF_200_CHARACTERS}

" + f"

III. Conclusion

{PARAGRAPH_OF_200_CHARACTERS}

" +) + +CONTENT_DCT = { + "id": "00001", + "type": "page", + "title": "Making API requests with ACK", + "space": {"name": "How To Guides"}, + "metadata": {"labels": {"results": [{"name": "ack"}, {"name": "api"}]}}, + "_links": {"self": "https://your-domain.com/wiki/rest/api/content/00001", "tinyui": "/x/aBcD"}, + "body": {"storage": {"value": HTML_BODY}}, +} + +EXPECTED_CLIENT_PROPERTIES = { + "client_property_SALESFORCE ID": "0061t000003WEJ3AAO", + "client_property_CONFIDENTIALITY": "PUBLIC", + "client_property_ARTICLE STATUS": "IN PROGRESS", + "client_property_INDUSTRY": "Automotive", + "client_property_CLIENT COMPANY": "Michelin", + "client_property_SCOPE": "France", + "client_property_MISSION START DATE": "2020/01/01", + "client_property_MISSION END DATE": "8 weeks", + "client_property_AMOUNT SOLD": "45 KEUR", + "client_property_MISSION TOPIC": "Precision Marketing", + "client_property_COMMERCIAL PROPOSAL": "https://commercial_proposal.com", + "client_property_ONE PAGER": "https://one_pager.com", + "client_property_ARCHITECTURE": "https://architecture.com", +} + +EXPECTED_CLIENT_COMPLETION = { + "client_completion_KEY LEARNINGS": 1, + "client_completion_CONTEXT": 1, + "client_completion_APPROACH": 0, + "client_completion_CONCLUSION": 1, +} + + +class CustomFieldsTest(TestCase): + def test__get_tiny_link(self): + from ack.readers.confluence.helper import _get_tiny_link + + field_value = {"self": "https://your-domain.com/wiki/rest/api/content/00001", "tinyui": "/x/aBcD"} + expected = "https://your-domain.com/wiki/x/aBcD" + self.assertEqual(_get_tiny_link(field_value), expected) + + @parameterized.expand([([{"name": "ack"}, {"name": "api"}], "name", "ack|api"), ([], "name", "")]) + def test__get_key_values_from_list_of_dct(self, field_value, key, expected): + from ack.readers.confluence.helper import _get_key_values_from_list_of_dct + + self.assertEqual(_get_key_values_from_list_of_dct(field_value, key), expected) + + def test__get_client_properties(self): + from ack.readers.confluence.helper import _get_client_properties + + self.assertDictEqual(_get_client_properties(HTML_BODY), EXPECTED_CLIENT_PROPERTIES) + + def test__get_client_completion(self): + from ack.readers.confluence.helper import _get_client_completion + + self.assertDictEqual(_get_client_completion(HTML_BODY), EXPECTED_CLIENT_COMPLETION) + + @parameterized.expand( + [ + ( + ( + "

II. Approach

In data we trust.

" + "

Team

B. Gates, M. Zuckerberg

" + ), + "

II. Approach

In data we trust.

", + ), + ( + ( + "II. Approach

In data we trust.

" + "

Team

B. Gates, M. Zuckerberg

" + ), + "II. Approach

In data we trust.

", + ), + ] + ) + def test__get_section_by_title(self, html_body, expected): + from ack.readers.confluence.helper import _get_section_by_title + + searched_title = "APPROACH" + html_soup = BeautifulSoup(html_body, "lxml") + output = _get_section_by_title(html_soup, searched_title) + self.assertEqual(str(output), expected) + + +class DictToCleanTest(TestCase): + def test__clean(self): + from ack.readers.confluence.helper import DictToClean + + dct = {"CLIENT COMPANY": "Michelin", "SCOPE": "France", "TEAM SIZE": 10} + expected_keys = ["CLIENT COMPANY", "AMOUNT SOLD", "SCOPE"] + default_value = "" + prefix = "prefix_" + + expected = {"prefix_CLIENT COMPANY": "Michelin", "prefix_AMOUNT SOLD": "", "prefix_SCOPE": "France"} + output = DictToClean(dct, expected_keys, default_value, prefix).clean() + self.assertDictEqual(output, expected) + + +class ParseResponseTest(TestCase): + @parameterized.expand( + [("title", ["title"]), ("space.name", ["space", "name"]), ("label_names", ["metadata", "labels", "results"])] + ) + def test__get_field_path(self, field, expected): + from ack.readers.confluence.helper import _get_field_path + + self.assertListEqual(_get_field_path(field), expected) + + @parameterized.expand( + [ + (["title"], "Making API requests with ACK"), + (["space", "name"], "How To Guides"), + (["metadata", "labels"], {"results": [{"name": "ack"}, {"name": "api"}]}), + (["invalid_key"], None), + ] + ) + def test__get_field_value(self, field_path, expected): + from ack.readers.confluence.helper import _get_field_value + + self.assertEqual(_get_field_value(CONTENT_DCT, field_path), expected) + + @parameterized.expand( + [ + ("title", "Making API requests with ACK", {"title": "Making API requests with ACK"}), + ("space.name", "How To Guides", {"space.name": "How To Guides"}), + ("label_names", [{"name": "ack"}, {"name": "api"}], {"label_names": "ack|api"}), + ] + ) + def test__format_field_as_dct(self, field, field_value, expected): + from ack.readers.confluence.helper import _format_field_as_dct + + self.assertDictEqual(_format_field_as_dct(field, field_value), expected) + + def test__parse_response(self): + from ack.readers.confluence.helper import parse_response + + raw_response = { + "results": [ + { + "id": "00001", + "type": "page", + "title": "Making API requests with ACK", + "space": {"name": "How To Guides"}, + "metadata": {"labels": {"results": [{"name": "ack"}, {"name": "api"}]}}, + }, + { + "id": "00002", + "type": "page", + "title": "Writting a Client Case", + "space": {"name": "How To Guides"}, + "metadata": {"labels": {"results": [{"name": "confluence"}]}}, + }, + { + "id": "00003", + "type": "page", + "title": "Developping with Github", + "space": {"name": "How To Guides"}, + "metadata": {"labels": {"results": [{"name": "git"}]}}, + }, + ] + } + fields = ["title", "space.name", "label_names"] + expected = [ + {"title": "Making API requests with ACK", "space.name": "How To Guides", "label_names": "ack|api"}, + {"title": "Writting a Client Case", "space.name": "How To Guides", "label_names": "confluence"}, + {"title": "Developping with Github", "space.name": "How To Guides", "label_names": "git"}, + ] + output = parse_response(raw_response, fields) + for output_record, expected_record in zip(iter(output), iter(expected)): + self.assertDictEqual(output_record, expected_record) + + @parameterized.expand([("\u2705 Title with \ud83d\udd36 emoji \ud83d\udd34", "Title with emoji"), (0, 0)]) + def test__decode(self, raw_value, expected): + from ack.readers.confluence.helper import _decode + + self.assertEqual(_decode(raw_value), expected) diff --git a/tests/readers/confluence/test_reader.py b/tests/readers/confluence/test_reader.py new file mode 100644 index 00000000..e145573c --- /dev/null +++ b/tests/readers/confluence/test_reader.py @@ -0,0 +1,120 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from unittest import TestCase, mock + +from click import ClickException +from ack.readers.confluence.reader import ConfluenceReader + +KEY1_RAW_RESPONSE_PAGE0 = { + "results": [ + { + "title": "Making API requests with ACK", + "space": {"name": "How To Guides"}, + "metadata": {"labels": {"results": [{"name": "api"}]}}, + }, + { + "title": "Writting a Client Case", + "space": {"name": "How To Guides"}, + "metadata": {"labels": {"results": [{"name": "confluence"}]}}, + }, + ], + "_links": {"next": "link_to_next_request_page"}, +} + +KEY1_RAW_RESPONSE_PAGE1 = { + "results": [ + { + "title": "Developping with Github", + "space": {"name": "How To Guides"}, + "metadata": {"labels": {"results": [{"name": "git"}]}}, + } + ], + "_links": {}, +} + +KEY1_FINAL_RECORDS = [ + {"title": "Making API requests with ACK", "space.name": "How To Guides", "label_names": "api"}, + {"title": "Writting a Client Case", "space.name": "How To Guides", "label_names": "confluence"}, + {"title": "Developping with Github", "space.name": "How To Guides", "label_names": "git"}, +] + +KEY2_FINAL_RECORDS = [ + {"title": "Samsung - Precision Marketing", "space.name": "Clients", "label_names": "pm"}, + {"title": "P&G - Demand Sensing", "space.name": "Clients", "label_names": "ai"}, + {"title": "Orange - Call center automation", "space.name": "Clients", "label_names": "ai"}, +] + + +class ConfluenceReaderTest(TestCase): + + kwargs = { + "user_login": "firstname.name@your-domain.com", + "api_token": "aAbBcCdDeE12fFgGhHiIjJ34", + "atlassian_domain": "https://your-domain.com", + "content_type": "page", + "spacekey": [], + "field": ["title", "space.name", "label_names"], + } + + @mock.patch( + "ack.readers.confluence.reader.CUSTOM_FIELDS", + { + "custom_field_A": {"specific_to_spacekeys": ["KEY1"]}, + "custom_field_B": {"specific_to_spacekeys": ["KEY1", "KEY2"]}, + "custom_field_C": {}, + }, + ) + def test__validate_spacekeys(self): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update({"field": ["custom_field_A", "custom_field_B", "custom_field_C"]}) + with self.assertRaises(ClickException): + ConfluenceReader(**temp_kwargs) + + def test__build_headers(self): + output = ConfluenceReader(**self.kwargs).headers + expected = { + "Authorization": "Basic Zmlyc3RuYW1lLm5hbWVAeW91ci1kb21haW4uY29tOmFBYkJjQ2REZUUxMmZGZ0doSGlJakozNA==", + "Content-Type": "application/json", + } + self.assertDictEqual(output, expected) + + def test__build_params(self): + output = ConfluenceReader(**self.kwargs)._build_params() + expected = {"type": "page", "expand": "title,space.name,metadata.labels.results"} + self.assertDictEqual(output, expected) + + @mock.patch.object(ConfluenceReader, "_get_raw_response", side_effect=[KEY1_RAW_RESPONSE_PAGE0, KEY1_RAW_RESPONSE_PAGE1]) + def test__get_report_generator(self, mock_get_raw_response): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update({"spacekey": ["KEY1"]}) + output = ConfluenceReader(**self.kwargs)._get_report_generator() + expected = iter(KEY1_FINAL_RECORDS) + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record) + + @mock.patch.object( + ConfluenceReader, "_get_report_generator", side_effect=[iter(KEY1_FINAL_RECORDS), iter(KEY2_FINAL_RECORDS)] + ) + def test__get_aggregated_report_generator(self, mock_get_report_generator): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update({"spacekey": ["KEY1", "KEY2"]}) + output = ConfluenceReader(**self.kwargs)._get_aggregated_report_generator() + expected = iter(KEY1_FINAL_RECORDS + KEY2_FINAL_RECORDS) + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record) diff --git a/tests/readers/facebook/__init__.py b/tests/readers/facebook/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/facebook/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_facebook_reader.py b/tests/readers/facebook/test_reader.py similarity index 59% rename from tests/readers/test_facebook_reader.py rename to tests/readers/facebook/test_reader.py index 1dc1eab4..4a4baf25 100644 --- a/tests/readers/test_facebook_reader.py +++ b/tests/readers/facebook/test_reader.py @@ -17,14 +17,19 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. from unittest import TestCase, mock -from parameterized import parameterized + from click import ClickException +from facebook_business.adobjects.ad import Ad +from facebook_business.adobjects.adsinsights import AdsInsights +from facebook_business.api import FacebookAdsApi +from ack.readers.facebook.reader import FacebookReader +from parameterized import parameterized -from nck.readers.facebook_reader import FacebookReader -from facebook_business.api import FacebookAdsApi -from facebook_business.adobjects.adsinsights import AdsInsights -from facebook_business.adobjects.ad import Ad +def mock_facebook_obj(data): + mocked_facebook_obj = mock.MagicMock() + mocked_facebook_obj._data = data + return mocked_facebook_obj class FacebookReaderTest(TestCase): @@ -43,46 +48,24 @@ class FacebookReaderTest(TestCase): "action_breakdown": [], "field": [], "time_increment": None, - "start_date": None, - "end_date": None, + "start_date": "2020-01-01", + "end_date": "2020-02-02", "date_preset": None, "add_date_to_report": False, } @parameterized.expand( [ - ( - "object_type_and_level_combination", - {"object_type": "ad", "level": "account"}, - ), - ( - "ad_insights_level", - {"ad_insights": True, "object_type": "creative", "level": "creative"}, - ), - ( - "ad_insights_breakdowns", - {"ad_insights": True, "field": ["age"], "breakdown": []}, - ), + ("object_type_and_level_combination", {"object_type": "ad", "level": "account"}), + ("ad_insights_level", {"ad_insights": True, "object_type": "creative", "level": "creative"}), + ("ad_insights_breakdowns", {"ad_insights": True, "field": ["age"], "breakdown": []}), ( "ad_insights_action_breakdowns", - { - "ad_insights": True, - "field": ["actions[action_type:link_click]"], - "action_breakdown": [], - }, - ), - ( - "ad_management_inputs_breakdown_check", - {"ad_insights": False, "breakdown": ["age"]}, - ), - ( - "ad_management_inputs_action_breakdown_check", - {"ad_insights": False, "action_breakdown": ["action_type"]}, - ), - ( - "ad_management_inputs_time_increment_check", - {"ad_insights": False, "time_increment": "1"}, + {"ad_insights": True, "field": ["actions[action_type:link_click]"], "action_breakdown": []}, ), + ("ad_management_inputs_breakdown_check", {"ad_insights": False, "breakdown": ["age"]}), + ("ad_management_inputs_action_breakdown_check", {"ad_insights": False, "action_breakdown": ["action_type"]}), + ("ad_management_inputs_time_increment_check", {"ad_insights": False, "time_increment": "1"}), ] ) @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) @@ -97,11 +80,7 @@ def test_get_api_fields(self): temp_kwargs = self.kwargs.copy() temp_kwargs.update( { - "field": [ - "impressions", - "link_url_asset[website_url]", - "actions[action_type:link_click]", - ], + "field": ["impressions", "link_url_asset[website_url]", "actions[action_type:link_click]"], "breakdown": ["link_url_asset"], "action_breakdown": ["action_type"], } @@ -115,11 +94,7 @@ def test_get_field_paths(self): temp_kwargs = self.kwargs.copy() temp_kwargs.update( { - "field": [ - "impressions", - "link_url_asset[website_url]", - "actions[action_type:link_click]", - ], + "field": ["impressions", "link_url_asset[website_url]", "actions[action_type:link_click]"], "breakdown": ["link_url_asset"], "action_breakdown": ["action_type"], } @@ -131,14 +106,12 @@ def test_get_field_paths(self): ] self.assertEqual(FacebookReader(**temp_kwargs)._field_paths, expected) - @mock.patch("nck.readers.facebook_reader.FacebookReader.query_ad_insights") + @mock.patch("ack.readers.facebook.reader.FacebookReader.query_ad_insights") @mock.patch.object(FacebookReader, "get_params", lambda *args: {}) @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) def test_read_with_ad_insights_query(self, mock_query_ad_insights): temp_kwargs = self.kwargs.copy() - temp_kwargs.update( - {"ad_insights": True, "field": ["date_start", "impressions"]} - ) + temp_kwargs.update({"ad_insights": True, "field": ["date_start", "impressions"]}) row1, row2 = AdsInsights(), AdsInsights() row1.set_data({"date_start": "2020-01-01", "impressions": "1"}) @@ -154,7 +127,7 @@ def test_read_with_ad_insights_query(self, mock_query_ad_insights): for record, report in zip(data.readlines(), iter(expected)): self.assertEqual(record, report) - @mock.patch("nck.readers.facebook_reader.FacebookReader.query_ad_management") + @mock.patch("ack.readers.facebook.reader.FacebookReader.query_ad_management") @mock.patch.object(FacebookReader, "get_params", lambda *args: {}) @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) def test_read_with_ad_management_query(self, mock_query_ad_management): @@ -191,20 +164,9 @@ def test_read_with_ad_management_query(self, mock_query_ad_management): ), ( "action_breakdown_field_without_filters", - { - "field": ["actions"], - "action_breakdown": ["action_type", "action_device"], - }, - { - "actions": [ - {"action_type": "link_click", "value": "0"}, - {"action_type": "post_engagement", "value": "1"}, - ] - }, - { - "actions[action_type:link_click]": "0", - "actions[action_type:post_engagement]": "1", - }, + {"field": ["actions"], "action_breakdown": ["action_type", "action_device"]}, + {"actions": [{"action_type": "link_click", "value": "0"}, {"action_type": "post_engagement", "value": "1"}]}, + {"actions[action_type:link_click]": "0", "actions[action_type:post_engagement]": "1"}, ), ( "action_breakdown_field_without_filters", @@ -214,26 +176,10 @@ def test_read_with_ad_management_query(self, mock_query_ad_management): }, { "actions": [ - { - "action_type": "link_click", - "action_device": "iphone", - "value": "0", - }, - { - "action_type": "post_engagement", - "action_device": "iphone", - "value": "1", - }, - { - "action_type": "link_click", - "action_device": "desktop", - "value": "2", - }, - { - "action_type": "post_engagement", - "action_device": "desktop", - "value": "3", - }, + {"action_type": "link_click", "action_device": "iphone", "value": "0"}, + {"action_type": "post_engagement", "action_device": "iphone", "value": "1"}, + {"action_type": "link_click", "action_device": "desktop", "value": "2"}, + {"action_type": "post_engagement", "action_device": "desktop", "value": "3"}, ] }, {"actions[action_type:link_click][action_device:iphone]": "0"}, @@ -244,12 +190,59 @@ def test_read_with_ad_management_query(self, mock_query_ad_management): {"impressions": "1"}, {"impressions": "1"}, ), + ( + "various_field_formats", + {"field": ["f_string", "f_numeric", "f_list_of_single_values", "f_python_obj", "f_facebook_obj"]}, + { + "f_string": "CAMPAIGN_PAUSED", + "f_numeric": 10.95, + "f_list_of_single_values": ["CAMPAIGN_PAUSED", 1, 10.95], + "f_python_obj": [{"event": "CLICK_THROUGH", "days": 28}, {"event": "VIEW_THROUGH", "days": 1}], + "f_facebook_obj": mock_facebook_obj({"id": "123456789", "display_name": "my_object_name"}), + }, + { + "f_string": "CAMPAIGN_PAUSED", + "f_numeric": "10.95", + "f_list_of_single_values": "CAMPAIGN_PAUSED, 1, 10.95", + "f_python_obj": "[{'event': 'CLICK_THROUGH', 'days': 28}, {'event': 'VIEW_THROUGH', 'days': 1}]", + "f_facebook_obj": "{'id': '123456789', 'display_name': 'my_object_name'}", + }, + ), ] ) @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) def test_format_and_yield(self, name, parameters, record, expected): temp_kwargs = self.kwargs.copy() temp_kwargs.update(parameters) - self.assertEqual( - next(FacebookReader(**temp_kwargs).format_and_yield(record)), expected - ) + self.assertEqual(next(FacebookReader(**temp_kwargs).format_and_yield(record)), expected) + + @parameterized.expand( + [ + ("simple_list_of_dicts", [{"event": "CLICK_THROUGH", "days": 28}, {"event": "VIEW_THROUGH", "days": 1}], False), + ( + "action_breakdown_list_of_dicts", + [ + {"action_type": "link_click", "action_device": "iphone", "value": "0"}, + {"action_type": "post_engagement", "action_device": "iphone", "value": "1"}, + ], + True, + ), + ] + ) + def test_obj_follows_action_breakdown_pattern(self, name, obj, expected): + from ack.readers.facebook.helper import obj_follows_action_breakdown_pattern + + output = obj_follows_action_breakdown_pattern(obj) + self.assertEqual(output, expected) + + @parameterized.expand( + [ + ("list_of_dicts", [{"event": "CLICK_THROUGH", "days": 28}, {"event": "VIEW_THROUGH", "days": 1}], False), + ("list_of_single_values", ["CAMPAIGN_PAUSED", 1, 10.95], True), + ] + ) + def test_obj_is_list_of_single_values(self, name, obj, expected): + from ack.readers.facebook.helper import obj_is_list_of_single_values + + output = obj_is_list_of_single_values(obj) + self.assertEqual(output, expected) diff --git a/tests/readers/google_ads/__init__.py b/tests/readers/google_ads/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/google_ads/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_googleads_reader.py b/tests/readers/google_ads/test_reader.py similarity index 61% rename from tests/readers/test_googleads_reader.py rename to tests/readers/google_ads/test_reader.py index 6e48e82d..9e05d32c 100644 --- a/tests/readers/test_googleads_reader.py +++ b/tests/readers/google_ads/test_reader.py @@ -15,24 +15,26 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from unittest import TestCase, mock -from parameterized import parameterized + import datetime -from click import ClickException +from unittest import TestCase, mock -from nck.readers.googleads_reader import GoogleAdsReader, DATEFORMAT -from nck.helpers.googleads_helper import DATE_RANGE_TYPE_POSSIBLE_VALUES +from click import ClickException +from ack.readers.google_ads.reader import GoogleAdsReader +from ack.readers.google_ads.config import DATEFORMAT +from ack.utils.exceptions import InconsistentDateDefinitionException, NoDateDefinitionException +from parameterized import parameterized def mock_query(*args, **kwargs): - example_row1 = 'ad_group_example,2019-01-01,0' - example_row2 = 'ad_group_example,2019-01-01,4' + example_row1 = "ad_group_example,2019-01-01,0" + example_row2 = "ad_group_example,2019-01-01,4" return lambda x: [example_row1, example_row2] def mock_video_query(*args, **kwargs): - example_row1 = '1234567890\n' - example_row2 = '1111111111\n' + example_row1 = "1234567890\n" + example_row2 = "1111111111\n" return lambda x: [example_row1, example_row2] @@ -52,9 +54,9 @@ def mock_googleads_reader(self, **kwargs): "client_customer_ids": ["123-456-7890"], "report_name": "Custom Report", "report_type": "AD_PERFORMANCE_REPORT", - "date_range_type": "LAST_7_DAYS", - "start_date": "", - "end_date": "", + "date_range_type": "CUSTOM_DATE", + "start_date": datetime.date(2019, 1, 1), + "end_date": datetime.date(2019, 3, 1), "download_format": "CSV", "fields": ("CampaignId", "Date", "Impressions"), "report_filter": {}, @@ -72,46 +74,40 @@ def test_format_customer_id(self): @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) def test_add_report_filter(self): - report_filter = {'field': "CampaignName", 'operator': 'IN', 'values': ['example']} - report_definition = {'selector': {}} - expected_output = {'selector': {'predicates': report_filter}} + report_filter = {"field": "CampaignName", "operator": "IN", "values": ["example"]} + report_definition = {"selector": {}} + expected_output = {"selector": {"predicates": report_filter}} temp_kwargs = self.kwargs.copy() - temp_kwargs.update({'report_filter': report_filter}) + temp_kwargs.update({"report_filter": report_filter}) GoogleAdsReader(**temp_kwargs).add_report_filter(report_definition) assert report_definition == expected_output @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) def test_missing_field_report_filter(self): - missing_field = {'wrong_key': "CampaignName", 'operator': 'IN', 'values': ['example']} - report_definition = {'selector': {}} + missing_field = {"wrong_key": "CampaignName", "operator": "IN", "values": ["example"]} + report_definition = {"selector": {}} with self.assertRaises(ClickException): temp_kwargs = self.kwargs.copy() - temp_kwargs.update({'report_filter': missing_field}) + temp_kwargs.update({"report_filter": missing_field}) GoogleAdsReader(**temp_kwargs).add_report_filter(report_definition) @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) def test_invalid_report_filter(self): - not_a_dict = ['field', 'operator', 'values'] - report_definition = {'selector': {}} + not_a_dict = ["field", "operator", "values"] + report_definition = {"selector": {}} with self.assertRaises(AttributeError): temp_kwargs = self.kwargs.copy() - temp_kwargs.update({'report_filter': not_a_dict}) + temp_kwargs.update({"report_filter": not_a_dict}) GoogleAdsReader(**temp_kwargs).add_report_filter(report_definition) - @mock.patch("nck.readers.googleads_reader.GoogleAdsReader.fetch_report_from_gads_client_customer_obj") - @mock.patch("nck.readers.googleads_reader.codecs.getreader", side_effect=mock_query) + @mock.patch("ack.readers.google_ads.reader.GoogleAdsReader.fetch_report_from_gads_client_customer_obj") + @mock.patch("ack.readers.google_ads.reader.codecs.getreader", side_effect=mock_query) @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) def test_read_data(self, mock_report, mock_query): reader = GoogleAdsReader(**self.kwargs) expected = [ - { - "AdGroupName": "ad_group_example", - "Date": "2019-01-01", - "Impressions": "0"}, - { - "AdGroupName": "ad_group_example", - "Date": "2019-01-01", - "Impressions": "4"} + {"AdGroupName": "ad_group_example", "Date": "2019-01-01", "Impressions": "0"}, + {"AdGroupName": "ad_group_example", "Date": "2019-01-01", "Impressions": "4"}, ] for data in reader.read(): @@ -119,25 +115,17 @@ def test_read_data(self, mock_report, mock_query): for record, output in zip(data.readlines(), iter(expected)): assert record == output - @mock.patch("nck.readers.googleads_reader.GoogleAdsReader.fetch_report_from_gads_client_customer_obj") - @mock.patch("nck.readers.googleads_reader.codecs.getreader", side_effect=mock_query) + @mock.patch("ack.readers.google_ads.reader.GoogleAdsReader.fetch_report_from_gads_client_customer_obj") + @mock.patch("ack.readers.google_ads.reader.codecs.getreader", side_effect=mock_query) @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) def test_read_data_and_include_account_id(self, mock_report, mock_query): temp_kwargs = self.kwargs.copy() - temp_kwargs.update({'include_client_customer_id': True}) + temp_kwargs.update({"include_client_customer_id": True}) reader = GoogleAdsReader(**temp_kwargs) expected = [ - { - "AdGroupName": "ad_group_example", - "AccountId": "123-456-7890", - "Date": "2019-01-01", - "Impressions": "0"}, - { - "AdGroupName": "ad_group_example", - "AccountId": "123-456-7890", - "Date": "2019-01-01", - "Impressions": "4"} + {"AdGroupName": "ad_group_example", "AccountId": "123-456-7890", "Date": "2019-01-01", "Impressions": "0"}, + {"AdGroupName": "ad_group_example", "AccountId": "123-456-7890", "Date": "2019-01-01", "Impressions": "4"}, ] for data in reader.read(): @@ -145,16 +133,13 @@ def test_read_data_and_include_account_id(self, mock_report, mock_query): for record, output in zip(data.readlines(), iter(expected)): assert record == output - @mock.patch("nck.readers.googleads_reader.GoogleAdsReader.fetch_report_from_gads_client_customer_obj") - @mock.patch("nck.readers.googleads_reader.codecs.getreader", side_effect=mock_video_query) + @mock.patch("ack.readers.google_ads.reader.GoogleAdsReader.fetch_report_from_gads_client_customer_obj") + @mock.patch("ack.readers.google_ads.reader.codecs.getreader", side_effect=mock_video_query) @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) def test_list_video_campaign_ids(self, mock_report, mock_query): temp_kwargs = self.kwargs.copy() temp_kwargs.update({"filter_on_video_campaigns": True}) - expected = set([ - '1234567890', - '1111111111', - ]) + expected = set(["1234567890", "1111111111"]) set_ids = GoogleAdsReader(**temp_kwargs).list_video_campaign_ids() assert len(set_ids) != 0 assert set_ids == expected @@ -167,24 +152,12 @@ def test_no_campaignid_for_video_report_definition(self): with self.assertRaises(ClickException): GoogleAdsReader(**temp_kwargs).get_video_campaign_report_definition() - @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) - def test_get_video_report_definition_standard_date(self): - video_report_def = GoogleAdsReader(**self.kwargs).get_video_campaign_report_definition() - expected_output_standard_date = { - "reportName": "video campaigns ids", - "dateRangeType": "LAST_7_DAYS", - "reportType": "VIDEO_PERFORMANCE_REPORT", - "downloadFormat": "CSV", - "selector": {"fields": "CampaignId"}, - } - assert video_report_def == expected_output_standard_date - @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) def test_get_video_report_definition_custom_date(self): custom_date_param = { - 'date_range_type': "CUSTOM_DATE", - 'start_date': datetime.date(2019, 1, 1), - 'end_date': datetime.date(2019, 3, 1) + "date_range_type": "CUSTOM_DATE", + "start_date": datetime.date(2019, 1, 1), + "end_date": datetime.date(2019, 3, 1), } temp_kwargs = self.kwargs.copy() temp_kwargs.update(custom_date_param) @@ -194,14 +167,11 @@ def test_get_video_report_definition_custom_date(self): "dateRangeType": "CUSTOM_DATE", "reportType": "VIDEO_PERFORMANCE_REPORT", "downloadFormat": "CSV", - "selector": { - "fields": "CampaignId", - "dateRange": {"min": "20190101", "max": "20190301"}, - }, + "selector": {"fields": "CampaignId", "dateRange": {"min": "20190101", "max": "20190301"}}, } assert video_report_def == expected_output_custom_date - @parameterized.expand(['1231231234', '123_123_1234', 'abc-abc-abcd', '1234-123-123']) + @parameterized.expand(["1231231234", "123_123_1234", "abc-abc-abcd", "1234-123-123"]) @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) def test_refuse_incorrect_id(self, invalid_input): """Test the function checking the Client Customer IDs @@ -210,7 +180,7 @@ def test_refuse_incorrect_id(self, invalid_input): expected = None assert GoogleAdsReader(**self.kwargs).valid_client_customer_id(invalid_input) == expected - @parameterized.expand(['123-123-1234']) + @parameterized.expand(["123-123-1234"]) @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) def test_validate_correct_id(self, valid_input): """Test the function checking the Client Customer IDs @@ -219,28 +189,45 @@ def test_validate_correct_id(self, valid_input): cond = GoogleAdsReader(**self.kwargs).valid_client_customer_id(valid_input) assert cond - @parameterized.expand([ - ["end_date", {'date_range_type': "CUSTOM_DATE", 'start_date': datetime.date(2019, 1, 1), 'end_date': None}], - ["start_date", {'date_range_type': "CUSTOM_DATE", 'start_date': None, 'end_date': datetime.date(2019, 1, 1)}], - ["all_dates", {'date_range_type': "CUSTOM_DATE", 'start_date': None, 'end_date': None}], - ]) + @parameterized.expand( + [ + [ + {"date_range_type": "CUSTOM_DATE", "start_date": datetime.date(2019, 1, 1), "end_date": None}, + NoDateDefinitionException, + ], + [ + {"date_range_type": "CUSTOM_DATE", "start_date": None, "end_date": datetime.date(2019, 1, 1)}, + NoDateDefinitionException, + ], + [{"date_range_type": "CUSTOM_DATE", "start_date": None, "end_date": None}, NoDateDefinitionException], + [ + { + "date_range_type": "YESTERDAY", + "start_date": datetime.date(2019, 1, 1), + "end_date": datetime.date(2019, 3, 1), + }, + InconsistentDateDefinitionException, + ], + ] + ) @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) - def test_add_invalid_custom_period_to_report_definition(self, name, invalid_parameter): + def test_add_invalid_custom_period_to_report_definition(self, invalid_parameter, exception): """Test that report definition dateRangeType is replaced by default value when no start_date and end_date are provided """ - expected_range_type = DATE_RANGE_TYPE_POSSIBLE_VALUES[0] - - report_definition = self.get_report_definition(invalid_parameter['date_range_type']) + with self.assertRaises(exception): + report_definition = self.get_report_definition(invalid_parameter["date_range_type"]) + temp_kwargs = self.kwargs.copy() + temp_kwargs.update(invalid_parameter) + GoogleAdsReader(**temp_kwargs).add_period_to_report_definition(report_definition) + @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) + def test_add_period_to_report_definition_works_properly(self): + consistent_report_definition = {"date_range_type": "YESTERDAY", "start_date": None, "end_date": None} + report_definition = self.get_report_definition(consistent_report_definition["date_range_type"]) temp_kwargs = self.kwargs.copy() - temp_kwargs.update(invalid_parameter) - GoogleAdsReader(**temp_kwargs).add_period_to_report_definition(report_definition) - - assert report_definition['dateRangeType'] == expected_range_type - with self.assertRaises(KeyError): - report_definition['start_date'] - report_definition['end_date'] + temp_kwargs.update(consistent_report_definition) + self.assertIsNone(GoogleAdsReader(**temp_kwargs).add_period_to_report_definition(report_definition)) @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) def test_add_valid_custom_period_to_report_definition(self): @@ -249,44 +236,46 @@ def test_add_valid_custom_period_to_report_definition(self): """ valid_parameter = { - 'date_range_type': "CUSTOM_DATE", - 'start_date': datetime.date(2019, 1, 1), - 'end_date': datetime.date(2021, 2, 1) + "date_range_type": "CUSTOM_DATE", + "start_date": datetime.date(2019, 1, 1), + "end_date": datetime.date(2021, 2, 1), } - report_definition = self.get_report_definition(valid_parameter['date_range_type']) + report_definition = self.get_report_definition(valid_parameter["date_range_type"]) expected_date_range = { - "min": valid_parameter['start_date'].strftime(DATEFORMAT), - "max": valid_parameter['end_date'].strftime(DATEFORMAT) + "min": valid_parameter["start_date"].strftime(DATEFORMAT), + "max": valid_parameter["end_date"].strftime(DATEFORMAT), } temp_kwargs = self.kwargs.copy() temp_kwargs.update(valid_parameter) GoogleAdsReader(**temp_kwargs).add_period_to_report_definition(report_definition) - assert report_definition['dateRangeType'] == valid_parameter['date_range_type'] - assert report_definition['selector']['dateRange'] == expected_date_range + assert report_definition["dateRangeType"] == valid_parameter["date_range_type"] + assert report_definition["selector"]["dateRange"] == expected_date_range - @parameterized.expand([ - ["with_date", {'date_range_type': "LAST_7_DAYS", 'start_date': datetime.date(2019, 1, 1), 'end_date': None}], - ["no_date", {'date_range_type': "LAST_30_DAYS", 'start_date': None, 'end_date': None}], - ]) + @parameterized.expand( + [ + ["with_date", {"date_range_type": "LAST_7_DAYS", "start_date": datetime.date(2019, 1, 1), "end_date": None}], + ["no_date", {"date_range_type": "LAST_30_DAYS", "start_date": None, "end_date": None}], + ] + ) @mock.patch.object(GoogleAdsReader, "__init__", mock_googleads_reader) def test_add_standard_period_to_report_definition(self, name, valid_parameter): """Test that report definition with classic dateRangeType is correctly implemented whether or not the user specify a date range (not taken into account) """ - report_definition = self.get_report_definition(valid_parameter['date_range_type']) + report_definition = self.get_report_definition(valid_parameter["date_range_type"]) temp_kwargs = self.kwargs.copy() temp_kwargs.update(valid_parameter) GoogleAdsReader(**temp_kwargs).add_period_to_report_definition(report_definition) - assert report_definition['dateRangeType'] == valid_parameter['date_range_type'] + assert report_definition["dateRangeType"] == valid_parameter["date_range_type"] with self.assertRaises(KeyError): - report_definition['start_date'] + report_definition["start_date"] with self.assertRaises(KeyError): - report_definition['end_date'] + report_definition["end_date"] @staticmethod def get_report_definition(date_range_type): diff --git a/tests/readers/google_analytics/__init__.py b/tests/readers/google_analytics/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/google_analytics/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_ga_reader.py b/tests/readers/google_analytics/test_reader.py similarity index 68% rename from tests/readers/test_ga_reader.py rename to tests/readers/google_analytics/test_reader.py index eb0ddc63..a3481e87 100644 --- a/tests/readers/test_ga_reader.py +++ b/tests/readers/google_analytics/test_reader.py @@ -15,43 +15,37 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + from datetime import datetime from unittest import TestCase, mock -from click import ClickException -from nck.readers.ga_reader import GaReader, GaStream +from click import ClickException +from ack.readers.google_analytics.reader import GoogleAnalyticsReader -class GaReaderTest(TestCase): +class GoogleAnalyticsReaderTest(TestCase): DATEFORMAT = "%Y-%m-%d" def mock_ga_reader(self, **kwargs): for param, value in kwargs.items(): setattr(self, param, value) - def test_normalized_ga_stream(self): - rows = [{"ga:date": 0, "ga:dimension": "dim", "ga:metric": "met"}, {"ga:date-date": 0}, {"ga:date date": 0}] - expected = [{"date": 0, "dimension": "dim", "metric": "met"}, {"date_date": 0}, {"date_date": 0}] - ga_stream = GaStream("stream", iter(rows)) - for row, output in zip(ga_stream.readlines(), iter(expected)): - assert row == output - def test_format_date(self): test_date = "20190101" wrong_date = "01/01/2019" - assert GaReader.format_date(test_date) == "2019-01-01" - self.assertRaises(ValueError, GaReader.format_date, wrong_date) + assert GoogleAnalyticsReader.format_date(test_date) == "2019-01-01" + self.assertRaises(ValueError, GoogleAnalyticsReader.format_date, wrong_date) def test_get_days_delta(self): inputs = ["PREVIOUS_DAY", "LAST_7_DAYS", "LAST_30_DAYS", "LAST_90_DAYS"] expected = [1, 7, 30, 90] - output = [GaReader.get_days_delta(input) for input in inputs] + output = [GoogleAnalyticsReader.get_days_delta(input) for input in inputs] assert output == expected fail = "PVRIOUES_DAY" - self.assertRaises(ClickException, GaReader.get_days_delta, fail) + self.assertRaises(ClickException, GoogleAnalyticsReader.get_days_delta, fail) - @mock.patch("nck.readers.ga_reader.GaReader._run_query") - @mock.patch.object(GaReader, "__init__", mock_ga_reader) + @mock.patch("ack.readers.google_analytics.reader.GoogleAnalyticsReader._run_query") + @mock.patch.object(GoogleAnalyticsReader, "__init__", mock_ga_reader) def test_read(self, mock_query): kwargs = { @@ -60,11 +54,11 @@ def test_read(self, mock_query): "start_date": datetime(2019, 1, 1), "view_ids": ["0", "1"], "end_date": datetime(2019, 1, 1), - "add_view": False + "add_view": False, } - reader = GaReader(**kwargs) + reader = GoogleAnalyticsReader(**kwargs) kwargs["add_view"] = True - reader_with_view_id = GaReader(**kwargs) + reader_with_view_id = GoogleAnalyticsReader(**kwargs) format_data_return_value = [ { @@ -72,20 +66,14 @@ def test_read(self, mock_query): "dimensions": ["ga:date"], "metricHeader": {"metricHeaderEntries": [{"name": "ga:users", "type": "INTEGER"}]}, }, - "data": { - "rows": [{"dimensions": ["20190101"], "metrics": [{"values": ["2"]}]}], - "isDataGolden": True, - }, + "data": {"rows": [{"dimensions": ["20190101"], "metrics": [{"values": ["2"]}]}], "isDataGolden": True}, }, { "columnHeader": { "dimensions": ["ga:date"], "metricHeader": {"metricHeaderEntries": [{"name": "ga:newUsers", "type": "INTEGER"}]}, }, - "data": { - "rows": [{"dimensions": ["20190101"], "metrics": [{"values": ["1"]}]}], - "isDataGolden": True, - }, + "data": {"rows": [{"dimensions": ["20190101"], "metrics": [{"values": ["1"]}]}], "isDataGolden": True}, }, ] @@ -98,8 +86,10 @@ def test_format_data(mock_query): mock_query.return_value = format_data_return_value expected = [ - {"date": "2019-01-01", "users": "2"}, {"date": "2019-01-01", "newUsers": "1"}, - {"date": "2019-01-01", "users": "2"}, {"date": "2019-01-01", "newUsers": "1"} + {"date": "2019-01-01", "users": "2"}, + {"date": "2019-01-01", "newUsers": "1"}, + {"date": "2019-01-01", "users": "2"}, + {"date": "2019-01-01", "newUsers": "1"}, ] for data in reader.read(): @@ -113,7 +103,7 @@ def test_format_data_and_view_id(mock_query): {"viewId": "0", "date": "2019-01-01", "users": "2"}, {"viewId": "0", "date": "2019-01-01", "newUsers": "1"}, {"viewId": "1", "date": "2019-01-01", "users": "2"}, - {"viewId": "1", "date": "2019-01-01", "newUsers": "1"} + {"viewId": "1", "date": "2019-01-01", "newUsers": "1"}, ] for data in reader_with_view_id.read(): diff --git a/tests/readers/google_dbm/__init__.py b/tests/readers/google_dbm/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/google_dbm/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_dbm_reader.py b/tests/readers/google_dbm/test_reader.py similarity index 88% rename from tests/readers/test_dbm_reader.py rename to tests/readers/google_dbm/test_reader.py index eb676bc9..161862d9 100644 --- a/tests/readers/test_dbm_reader.py +++ b/tests/readers/google_dbm/test_reader.py @@ -15,22 +15,23 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + import datetime import unittest from unittest import mock -from nck.readers.dbm_reader import DbmReader +from ack.readers.google_dbm.reader import GoogleDBMReader -class TestDbmReader(unittest.TestCase): +class TestGoogleDBMReader(unittest.TestCase): def mock_dbm_reader(self, **kwargs): for param, value in kwargs.items(): setattr(self, param, value) - @mock.patch.object(DbmReader, "__init__", mock_dbm_reader) + @mock.patch.object(GoogleDBMReader, "__init__", mock_dbm_reader) def test_get_query_body(self): kwargs = {} - reader = DbmReader(**kwargs) + reader = GoogleDBMReader(**kwargs) reader.kwargs = {"filter": [("FILTER_ADVERTISER", 1)]} expected_query_body = { @@ -47,10 +48,10 @@ def test_get_query_body(self): self.assertDictEqual(reader.get_query_body(), expected_query_body) - @mock.patch.object(DbmReader, "__init__", mock_dbm_reader) + @mock.patch.object(GoogleDBMReader, "__init__", mock_dbm_reader) def test_get_query_body_ms_conversion(self): kwargs = {} - reader = DbmReader(**kwargs) + reader = GoogleDBMReader(**kwargs) reader.kwargs = { "filter": [("FILTER_ADVERTISER", 1)], "start_date": datetime.datetime(2020, 1, 15, tzinfo=datetime.timezone.utc), diff --git a/tests/readers/google_dcm/__init__.py b/tests/readers/google_dcm/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/google_dcm/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_dcm_reader.py b/tests/readers/google_dcm/test_reader.py similarity index 83% rename from tests/readers/test_dcm_reader.py rename to tests/readers/google_dcm/test_reader.py index b084fd95..94f9fc2f 100644 --- a/tests/readers/test_dcm_reader.py +++ b/tests/readers/google_dcm/test_reader.py @@ -15,31 +15,30 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from unittest import TestCase, mock -import logging -from nck.readers.dcm_reader import DcmReader +from unittest import TestCase, mock -logger = logging.getLogger("DCM_reader_test") +from ack.config import logger +from ack.readers.google_dcm.reader import GoogleDCMReader -class DCMReaderTest(TestCase): +class GoogleDCMReaderTest(TestCase): def mock_dcm_reader(self, **kwargs): for param, value in kwargs.items(): setattr(self, param, value) kwargs = {"metrics": ["impressions", "clicks"], "dimensions": ["date"]} - @mock.patch.object(DcmReader, "__init__", mock_dcm_reader) + @mock.patch.object(GoogleDCMReader, "__init__", mock_dcm_reader) def test_empty_data(self): - reader = DcmReader(**self.kwargs) + reader = GoogleDCMReader(**self.kwargs) input_report = (row for row in [b"No", b"Consistent", b"Data"]) if len(list(reader.format_response(input_report))) > 0: assert False, "Data is not empty" - @mock.patch.object(DcmReader, "__init__", mock_dcm_reader) + @mock.patch.object(GoogleDCMReader, "__init__", mock_dcm_reader) def test_format_data(self): - reader = DcmReader(**self.kwargs) + reader = GoogleDCMReader(**self.kwargs) input_report = (row for row in [b"x", b"x", b"Report Fields", b"headers", b"1,2,3", b"4,5,6", b"Grand Total"]) expected = [{"date": "1", "impressions": "2", "clicks": "3"}, {"date": "4", "impressions": "5", "clicks": "6"}] input_list = list(reader.format_response(input_report)) diff --git a/tests/readers/google_dv360/__init__.py b/tests/readers/google_dv360/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/google_dv360/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/google_dv360/test_reader.py b/tests/readers/google_dv360/test_reader.py new file mode 100644 index 00000000..08a9f81a --- /dev/null +++ b/tests/readers/google_dv360/test_reader.py @@ -0,0 +1,47 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from ack.readers.google_dv360.reader import GoogleDV360Reader +from unittest import TestCase, mock + + +class TestGoogleDV360Reader(TestCase): + def mock_dv360_reader(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + @mock.patch.object(GoogleDV360Reader, "__init__", mock_dv360_reader) + def test_get_sdf_body(self): + kwargs = {} + reader = GoogleDV360Reader(**kwargs) + reader.kwargs = { + "file_type": ["FILE_TYPE_INSERTION_ORDER", "FILE_TYPE_CAMPAIGN"], + "filter_type": "FILTER_TYPE_ADVERTISER_ID", + "advertiser_id": "4242424", + } + + expected_query_body = { + "parentEntityFilter": { + "fileType": ["FILE_TYPE_INSERTION_ORDER", "FILE_TYPE_CAMPAIGN"], + "filterType": "FILTER_TYPE_ADVERTISER_ID", + }, + "version": "SDF_VERSION_5_2", + "advertiserId": "4242424", + } + + self.assertDictEqual(reader._GoogleDV360Reader__get_sdf_body(), expected_query_body) diff --git a/tests/readers/google_search_console/__init__.py b/tests/readers/google_search_console/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/google_search_console/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_search_console_reader.py b/tests/readers/google_search_console/test_reader.py similarity index 89% rename from tests/readers/test_search_console_reader.py rename to tests/readers/google_search_console/test_reader.py index 1acfe716..089d6780 100644 --- a/tests/readers/test_search_console_reader.py +++ b/tests/readers/google_search_console/test_reader.py @@ -15,15 +15,17 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + from datetime import datetime -from nck.readers.search_console_reader import SearchConsoleReader from unittest import TestCase, mock +from ack.readers.google_search_console.reader import GoogleSearchConsoleReader + -class SearchConsoleReaderTest(TestCase): +class GoogleSearchConsoleReaderTest(TestCase): DATEFORMAT = "%Y-%m-%d" - @mock.patch("nck.readers.search_console_reader.SearchConsoleReader._run_query") + @mock.patch("ack.readers.google_search_console.reader.GoogleSearchConsoleReader._run_query") def test_read(self, mock_query): kwargs = { "client_id": "", @@ -36,8 +38,9 @@ def test_read(self, mock_query): "end_date": datetime(2019, 1, 1), "date_column": False, "row_limit": "", + "date_range": None, } - reader = SearchConsoleReader(**kwargs) + reader = GoogleSearchConsoleReader(**kwargs) def test_read_empty_data(mock_query): mock_query.return_value = [{"responseAgregationType": "byPage"}] @@ -62,7 +65,7 @@ def test_format_data(mock_query): def test_format_data_with_date_column(mock_query): kwargs["date_column"] = True - reader = SearchConsoleReader(**kwargs) + reader = GoogleSearchConsoleReader(**kwargs) mock_query.return_value = [ {"rows": [{"keys": ["MOBILE"], "clicks": 1, "impressions": 2}], "responseAgregationType": "byPage"}, {"rows": [{"keys": ["DESKTOP"], "clicks": 3, "impressions": 4}], "responseAgregationType": "byPage"}, diff --git a/tests/readers/object_storage/__init__.py b/tests/readers/object_storage/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/object_storage/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/object_storage/test_reader.py b/tests/readers/object_storage/test_reader.py new file mode 100644 index 00000000..5fef7f81 --- /dev/null +++ b/tests/readers/object_storage/test_reader.py @@ -0,0 +1,113 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import csv +import io +import json +from unittest import TestCase, mock + +from ack.readers.object_storage.reader import ObjectStorageReader +from parameterized import parameterized + +mock_csv_names = ["a.csv", "a.njson", "b.csv", "b.njson"] +mock_csv_files = [ + [["a", "b", "c"], [4, 5, 6], [7, 8, 9]], + [{"a": "4", "b": "5", "c": "6"}, {"a": "7", "b": "8", "c": "9"}], + [["a", "b", "c"], [4, 5, 6], [7, 8, 9]], + [{"a": "4", "b": "5", "c": "6"}, {"a": "7", "b": "8", "c": "9"}], +] + +mock_timestamp = [ + 1614179262, + 1614179272, + 1614179277, + 16141792778, +] + + +def mock_to_object(self, _object): + return _object + + +def mock_list_objects(self, bucket, prefix): + a = list(zip(mock_csv_names, mock_timestamp, mock_csv_files)) + return [x for x in a if x[0].startswith(prefix)] + + +def mock_get_timestamp(self, _object, **kwargs): + return _object[1] + + +def write_to_file(self, _object, f, **kwargs): + + if self._format == "csv": + + text_file = io.TextIOWrapper(f, encoding="utf-8", newline="") + w = csv.writer(text_file) + w.writerows(_object[2]) + text_file.detach() + + else: + + text_file = io.TextIOWrapper(f, encoding="utf-8") + for line in _object[2]: + + json.dump(line, text_file) + text_file.write("\n") + text_file.detach() + + +def mock_get_key(self, _object, **kwargs): + return _object[0] + + +@mock.patch("ack.readers.object_storage.reader.ObjectStorageReader.create_client") +@mock.patch("ack.readers.object_storage.reader.ObjectStorageReader.create_bucket") +@mock.patch.object(ObjectStorageReader, "download_object_to_file", write_to_file) +@mock.patch.object(ObjectStorageReader, "to_object", mock_to_object) +@mock.patch.object(ObjectStorageReader, "get_timestamp", mock_get_timestamp) +@mock.patch.object(ObjectStorageReader, "list_objects", mock_list_objects) +@mock.patch.object(ObjectStorageReader, "get_key", mock_get_key) +class ObjectStorageReaderTest(TestCase): + def test_wrong_format(self, a, b): + with self.assertRaises(NotImplementedError): + ObjectStorageReader( + bucket="", prefix=["a"], file_format="txt", dest_key_split=-1, csv_delimiter=",", csv_fieldnames=None + ) + + @parameterized.expand([("njson", 2), ("csv", 2)]) + def test_ObjectStorageReader_filter_files(self, a, b, format, nb_files_expected): + reader = ObjectStorageReader( + bucket="", prefix=[""], file_format=format, dest_key_split=-1, csv_delimiter=",", csv_fieldnames=None + ) + nb_file = len(list(reader.read())) + self.assertEqual(nb_file, nb_files_expected) + + @parameterized.expand( + [ + ("njson", [{"a": "4", "b": "5", "c": "6"}, {"a": "7", "b": "8", "c": "9"}]), + ("csv", [{"a": "4", "b": "5", "c": "6"}, {"a": "7", "b": "8", "c": "9"}]), + ] + ) + def test_ObjectStorageReader_read_all_file(self, a, b, format, expected): + reader = ObjectStorageReader( + bucket="", prefix=["a"], file_format="csv", dest_key_split=-1, csv_delimiter=",", csv_fieldnames=None + ) + for file in reader.read(): + for expect, data in zip(expected, file.readlines()): + self.assertEqual(expect, data) diff --git a/tests/readers/radarly/__init__.py b/tests/readers/radarly/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/radarly/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_radarly_reader.py b/tests/readers/radarly/test_reader.py similarity index 82% rename from tests/readers/test_radarly_reader.py rename to tests/readers/radarly/test_reader.py index 0651dbb9..57867333 100644 --- a/tests/readers/test_radarly_reader.py +++ b/tests/readers/radarly/test_reader.py @@ -15,34 +15,28 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from nck.readers.radarly_reader import RadarlyReader -from unittest import TestCase, mock -from unittest.mock import MagicMock -import logging +import json from datetime import datetime, timedelta from typing import Tuple +from unittest import TestCase, mock +from unittest.mock import MagicMock + import numpy as np -import json +from ack.config import logger +from ack.readers.radarly.reader import RadarlyReader -def create_mock_payload( - start_date: datetime, end_date: datetime -) -> Tuple[datetime, datetime, int]: +def create_mock_payload(start_date: datetime, end_date: datetime) -> Tuple[datetime, datetime, int]: return (start_date, end_date, int((end_date - start_date).total_seconds() * 2)) -def create_mock_publications_iterator( - param: Tuple[datetime, datetime, int] -) -> MagicMock: +def create_mock_publications_iterator(param: Tuple[datetime, datetime, int]) -> MagicMock: start_date, end_date, total = param delta = (end_date - start_date).total_seconds() mock_publications_iterator = MagicMock() mocked_publications = iter( - [ - {"date": start_date + timedelta(x), "text": "random text"} - for x in np.linspace(start=0, stop=delta, num=total) - ] + [{"date": start_date + timedelta(x), "text": "random text"} for x in np.linspace(start=0, stop=delta, num=total)] ) mock_publications_iterator.__iter__.return_value = mocked_publications mock_publications_iterator.__next__ = lambda x: next(mocked_publications) @@ -52,11 +46,11 @@ def create_mock_publications_iterator( class RadarlyReaderTest(TestCase): - @mock.patch("nck.readers.radarly_reader.RadarlyApi") - @mock.patch("nck.readers.radarly_reader.Project") - @mock.patch("nck.readers.radarly_reader.RadarlyReader.get_payload") + @mock.patch("ack.readers.radarly.reader.RadarlyApi") + @mock.patch("ack.readers.radarly.reader.Project") + @mock.patch("ack.readers.radarly.reader.RadarlyReader.get_payload") def test_read(self, mock_get_payload, mock_Project, mock_RadarlyApi): - mock_RadarlyApi.init.side_effect = lambda client_id, client_secret: logging.info( + mock_RadarlyApi.init.side_effect = lambda client_id, client_secret: logger.info( "Mock RadarlyApi successfully initiated" ) mock_get_payload.side_effect = create_mock_payload diff --git a/tests/readers/test_dv360_reader.py b/tests/readers/test_dv360_reader.py deleted file mode 100644 index dc738e4b..00000000 --- a/tests/readers/test_dv360_reader.py +++ /dev/null @@ -1,30 +0,0 @@ -from nck.readers.dv360_reader import DV360Reader -from unittest import TestCase, mock - - -class TestDV360Reader(TestCase): - - def mock_dv360_reader(self, **kwargs): - for param, value in kwargs.items(): - setattr(self, param, value) - - @mock.patch.object(DV360Reader, '__init__', mock_dv360_reader) - def test_get_sdf_body(self): - kwargs = {} - reader = DV360Reader(**kwargs) - reader.kwargs = { - "file_type": ["FILE_TYPE_INSERTION_ORDER", "FILE_TYPE_CAMPAIGN"], - "filter_type": "FILTER_TYPE_ADVERTISER_ID", - "advertiser_id": "4242424" - } - - expected_query_body = { - "parentEntityFilter": { - "fileType": ["FILE_TYPE_INSERTION_ORDER", "FILE_TYPE_CAMPAIGN"], - "filterType": "FILTER_TYPE_ADVERTISER_ID" - }, - "version": "SDF_VERSION_5_2", - "advertiserId": "4242424" - } - - self.assertDictEqual(reader.get_sdf_body(), expected_query_body) diff --git a/tests/readers/test_yandex_statistics_reader.py b/tests/readers/test_yandex_statistics_reader.py deleted file mode 100644 index e27532a5..00000000 --- a/tests/readers/test_yandex_statistics_reader.py +++ /dev/null @@ -1,287 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import datetime -import unittest - -import click -from parameterized import parameterized - -from nck.readers.yandex_statistics_reader import YandexStatisticsReader - - -class TestYandexStatisticsReader(unittest.TestCase): - - @parameterized.expand([ - ( - "ALL_TIME", - { - "report_language": "en", - "filters": (), - "max_rows": None, - "date_start": None, - "date_stop": None - }, - True, - { - "params": { - "SelectionCriteria": {}, - "FieldNames": ["AdFormat", "AdGroupId"], - "ReportName": "stats_report_2020-03_25", - "ReportType": "AD_PERFORMANCE_REPORT", - "DateRangeType": "ALL_TIME", - "Format": "TSV", - "IncludeVAT": "YES" - } - } - ), - ( - "ALL_TIME", - { - "report_language": "en", - "filters": (), - "max_rows": None, - "date_start": None, - "date_stop": None - }, - False, - { - "params": { - "SelectionCriteria": {}, - "FieldNames": ["AdFormat", "AdGroupId"], - "ReportName": "stats_report_2020-03_25", - "ReportType": "AD_PERFORMANCE_REPORT", - "DateRangeType": "ALL_TIME", - "Format": "TSV", - "IncludeVAT": "NO" - } - } - ), - ( - "CUSTOM_DATE", - { - "report_language": "en", - "filters": (), - "max_rows": 25, - "date_start": datetime.datetime(2020, 3, 5, 0, 0), - "date_stop": datetime.datetime(2020, 3, 25, 0, 0) - }, - False, - { - "params": { - "SelectionCriteria": { - "DateFrom": "2020-03-05", - "DateTo": "2020-03-25" - }, - "Page": { - "Limit": 25 - }, - "FieldNames": ["AdFormat", "AdGroupId"], - "ReportName": "stats_report_2020-03_25", - "ReportType": "AD_PERFORMANCE_REPORT", - "DateRangeType": "CUSTOM_DATE", - "Format": "TSV", - "IncludeVAT": "NO" - } - } - ), - ( - "CUSTOM_DATE", - { - "report_language": "en", - "filters": ( - ("AdGroupId", "EQUALS", ["1"]), - ("CampaignId", "IN", ["1", "2"]) - ), - "max_rows": 25, - "date_start": datetime.datetime(2020, 3, 5, 0, 0), - "date_stop": datetime.datetime(2020, 3, 25, 0, 0) - }, - False, - { - "params": { - "SelectionCriteria": { - "DateFrom": "2020-03-05", - "DateTo": "2020-03-25", - "Filter": [ - { - "Field": "AdGroupId", - "Operator": "EQUALS", - "Values": ["1"] - }, - { - "Field": "CampaignId", - "Operator": "IN", - "Values": ["1", "2"] - } - ] - }, - "Page": { - "Limit": 25 - }, - "FieldNames": ["AdFormat", "AdGroupId"], - "ReportName": "stats_report_2020-03_25", - "ReportType": "AD_PERFORMANCE_REPORT", - "DateRangeType": "CUSTOM_DATE", - "Format": "TSV", - "IncludeVAT": "NO" - } - } - ) - ]) - def test_get_query_body( - self, - date_range, - kwargs, - include_vat, - expected_query_body - ): - reader = YandexStatisticsReader( - "123", - ("AdFormat", "AdGroupId"), - "AD_PERFORMANCE_REPORT", - "stats_report_2020-03_25", - date_range, - include_vat, - report_language=kwargs["report_language"], - filters=kwargs["filters"], - max_rows=kwargs["max_rows"], - date_start=kwargs["date_start"], - date_stop=kwargs["date_stop"] - ) - self.assertDictEqual(reader._build_request_body(), expected_query_body) - - @parameterized.expand(["en", "ru", "uk"]) - def test_request_headers(self, report_language): - reader = YandexStatisticsReader( - "123", - ("AdFormat", "AdGroupId"), - "AD_PERFORMANCE_REPORT", - "stats_report_2020-03_25", - "ALL_TIME", - True, - report_language=report_language, - filters=(), - max_rows=None, - date_start=None, - date_stop=None - ) - self.assertDictEqual( - { - "skipReportSummary": "true", - "Accept-Language": report_language - }, - reader._build_request_headers() - ) - - @parameterized.expand([ - ( - "ALL_TIME", - None, - None, - {} - ), - ( - "CUSTOM_DATE", - datetime.datetime(2020, 1, 1), - datetime.datetime(2020, 1, 2), - { - "DateFrom": "2020-01-01", - "DateTo": "2020-01-02" - } - ) - ]) - def test_custom_dates_correctly_set(self, date_range, start_date, stop_date, expected): - reader = YandexStatisticsReader( - "123", - ("AdFormat", "AdGroupId"), - "AD_PERFORMANCE_REPORT", - "stats_report_2020-03_25", - date_range, - True, - date_start=start_date, - date_stop=stop_date - ) - self.assertDictEqual( - expected, - reader._add_custom_dates_if_set() - ) - - @parameterized.expand([ - ( - "ALL_TIME", - datetime.datetime(2020, 1, 1), - datetime.datetime(2020, 1, 2), - "Wrong date range. If start and stop dates are set, should be CUSTOM_DATE." - ), - ( - "CUSTOM_DATE", - None, - None, - "Missing at least one date. Have you set start and stop dates?" - ), - ( - "CUSTOM_DATE", - datetime.datetime(2020, 1, 1), - None, - "Missing at least one date. Have you set start and stop dates?" - ), - ( - "CUSTOM_DATE", - None, - datetime.datetime(2020, 1, 1), - "Missing at least one date. Have you set start and stop dates?" - ), - ( - "ALL_TIME", - None, - datetime.datetime(2020, 1, 1), - ( - "Wrong combination of date parameters. " - "Only use date start and date stop with date range set to CUSTOM_DATE." - ) - ), - ( - "ALL_TIME", - datetime.datetime(2020, 1, 1), - None, - ( - "Wrong combination of date parameters. " - "Only use date start and date stop with date range set to CUSTOM_DATE." - ) - ), - ]) - def test_custom_dates_not_correctly_set( - self, - date_range, - start_date, - stop_date, - error_message_expected - ): - reader = YandexStatisticsReader( - "123", - ("AdFormat", "AdGroupId"), - "AD_PERFORMANCE_REPORT", - "stats_report_2020-03_25", - date_range, - True, - date_start=start_date, - date_stop=stop_date - ) - with self.assertRaises(click.ClickException) as click_exception: - reader._add_custom_dates_if_set() - self.assertEquals(click_exception.exception.message, error_message_expected) diff --git a/tests/readers/the_trade_desk/__init__.py b/tests/readers/the_trade_desk/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/the_trade_desk/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_ttd.py b/tests/readers/the_trade_desk/test_reader.py similarity index 57% rename from tests/readers/test_ttd.py rename to tests/readers/the_trade_desk/test_reader.py index 93892ac3..eda6d28f 100644 --- a/tests/readers/test_ttd.py +++ b/tests/readers/the_trade_desk/test_reader.py @@ -16,11 +16,11 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime from unittest import TestCase, mock -from nck.readers.ttd_reader import TheTradeDeskReader -from datetime import datetime -from click import ClickException +from ack.readers.the_trade_desk.reader import TheTradeDeskReader +from ack.utils.exceptions import DateDefinitionException class TheTradeDeskReaderTest(TestCase): @@ -33,20 +33,20 @@ class TheTradeDeskReaderTest(TestCase): "report_schedule_name": "adgroup_performance_schedule", "start_date": datetime(2020, 1, 1), "end_date": datetime(2020, 3, 1), - "normalize_stream": False + "date_range": None, } - @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch("ack.readers.the_trade_desk.reader.TheTradeDeskReader._build_headers", return_value={}) def test_validate_dates(self, mock_build_headers): temp_kwargs = self.kwargs.copy() params = {"start_date": datetime(2020, 1, 3), "end_date": datetime(2020, 1, 1)} temp_kwargs.update(params) - with self.assertRaises(ClickException): + with self.assertRaises(DateDefinitionException): TheTradeDeskReader(**temp_kwargs) - @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch("ack.readers.the_trade_desk.reader.TheTradeDeskReader._build_headers", return_value={}) @mock.patch( - "nck.readers.ttd_reader.TheTradeDeskReader._make_api_call", + "ack.readers.the_trade_desk.reader.TheTradeDeskReader._make_api_call", return_value={ "Result": [ { @@ -60,16 +60,14 @@ def test_validate_dates(self, mock_build_headers): "ResultCount": 1, }, ) - def test_get_report_template_id_if_exactly_1_match( - self, mock_build_headers, mock_api_call - ): + def test_get_report_template_id_if_exactly_1_match(self, mock_build_headers, mock_api_call): reader = TheTradeDeskReader(**self.kwargs) reader._get_report_template_id() self.assertEqual(reader.report_template_id, 1234) - @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch("ack.readers.the_trade_desk.reader.TheTradeDeskReader._build_headers", return_value={}) @mock.patch( - "nck.readers.ttd_reader.TheTradeDeskReader._make_api_call", + "ack.readers.the_trade_desk.reader.TheTradeDeskReader._make_api_call", return_value={ "Result": [ { @@ -90,30 +88,22 @@ def test_get_report_template_id_if_exactly_1_match( "ResultCount": 2, }, ) - def test_get_report_template_id_if_more_than_1_match( - self, mock_build_headers, mock_api_call - ): + def test_get_report_template_id_if_more_than_1_match(self, mock_build_headers, mock_api_call): with self.assertRaises(Exception): TheTradeDeskReader(**self.kwargs)._get_report_template_id() - @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch("ack.readers.the_trade_desk.reader.TheTradeDeskReader._build_headers", return_value={}) @mock.patch( - "nck.readers.ttd_reader.TheTradeDeskReader._make_api_call", - return_value={"Result": [], "ResultCount": 0}, + "ack.readers.the_trade_desk.reader.TheTradeDeskReader._make_api_call", return_value={"Result": [], "ResultCount": 0}, ) - def test_get_report_template_id_if_no_match( - self, mock_build_headers, mock_api_call - ): + def test_get_report_template_id_if_no_match(self, mock_build_headers, mock_api_call): with self.assertRaises(Exception): TheTradeDeskReader(**self.kwargs)._get_report_template_id() - @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch("ack.readers.the_trade_desk.reader.TheTradeDeskReader._build_headers", return_value={}) @mock.patch( - "nck.readers.ttd_reader.TheTradeDeskReader._make_api_call", - return_value={ - "ReportScheduleId": 5678, - "ReportScheduleName": "adgroup_performance_schedule", - }, + "ack.readers.the_trade_desk.reader.TheTradeDeskReader._make_api_call", + return_value={"ReportScheduleId": 5678, "ReportScheduleName": "adgroup_performance_schedule"}, ) def test_create_report_schedule(self, mock_build_headers, mock_api_call): reader = TheTradeDeskReader(**self.kwargs) @@ -121,10 +111,10 @@ def test_create_report_schedule(self, mock_build_headers, mock_api_call): reader._create_report_schedule() self.assertEqual(reader.report_schedule_id, 5678) - @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch("ack.readers.the_trade_desk.reader.TheTradeDeskReader._build_headers", return_value={}) @mock.patch("tenacity.BaseRetrying.wait", side_effect=lambda *args, **kwargs: 0) @mock.patch( - "nck.readers.ttd_reader.TheTradeDeskReader._make_api_call", + "ack.readers.the_trade_desk.reader.TheTradeDeskReader._make_api_call", side_effect=[ { "Result": [ @@ -158,34 +148,22 @@ def test_wait_for_download_url(self, mock_build_headers, mock_retry, mock_api_ca reader._wait_for_download_url() self.assertEqual(reader.download_url, "https://download.url") - @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch("ack.readers.the_trade_desk.reader.TheTradeDeskReader._build_headers", return_value={}) @mock.patch("tenacity.BaseRetrying.wait", side_effect=lambda *args, **kwargs: 0) @mock.patch.object(TheTradeDeskReader, "_get_report_template_id", lambda *args: None) @mock.patch.object(TheTradeDeskReader, "_create_report_schedule", lambda *args: None) @mock.patch.object(TheTradeDeskReader, "_wait_for_download_url", lambda *args: None) @mock.patch( - "nck.readers.ttd_reader.TheTradeDeskReader._download_report", + "ack.readers.the_trade_desk.reader.TheTradeDeskReader._download_report", return_value=iter( [ - { - "Date": "2020-01-01T00:00:00", - "Advertiser ID": "XXXXX", - "Impressions": 10 - }, - { - "Date": "2020-02-01T00:00:00", - "Advertiser ID": "XXXXX", - "Impressions": 11 - }, - { - "Date": "2020-02-03T00:00:00", - "Advertiser ID": "XXXXX", - "Impressions": 12 - }, + {"Date": "2020-01-01T00:00:00", "Advertiser ID": "XXXXX", "Impressions": 10}, + {"Date": "2020-02-01T00:00:00", "Advertiser ID": "XXXXX", "Impressions": 11}, + {"Date": "2020-02-03T00:00:00", "Advertiser ID": "XXXXX", "Impressions": 12}, ] ), ) - def test_read_if_normalize_stream_is_False(self, mock_build_headers, mock_retry, mock_download_report): + def test_read(self, mock_build_headers, mock_retry, mock_download_report): reader = TheTradeDeskReader(**self.kwargs) reader.report_template_id = 1234 reader.report_schedule_id = 5678 @@ -198,46 +176,3 @@ def test_read_if_normalize_stream_is_False(self, mock_build_headers, mock_retry, ] for output_record, expected_record in zip(output.readlines(), iter(expected)): self.assertEqual(output_record, expected_record) - - @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) - @mock.patch("tenacity.BaseRetrying.wait", side_effect=lambda *args, **kwargs: 0) - @mock.patch.object(TheTradeDeskReader, "_get_report_template_id", lambda *args: None) - @mock.patch.object(TheTradeDeskReader, "_create_report_schedule", lambda *args: None) - @mock.patch.object(TheTradeDeskReader, "_wait_for_download_url", lambda *args: None) - @mock.patch( - "nck.readers.ttd_reader.TheTradeDeskReader._download_report", - return_value=iter( - [ - { - "Date": "2020-01-01T00:00:00", - "Advertiser ID": "XXXXX", - "Impressions": 10, - }, - { - "Date": "2020-02-01T00:00:00", - "Advertiser ID": "XXXXX", - "Impressions": 11, - }, - { - "Date": "2020-02-03T00:00:00", - "Advertiser ID": "XXXXX", - "Impressions": 12, - }, - ] - ), - ) - def test_read_if_normalize_stream_is_True(self, mock_build_headers, mock_retry, mock_download_report): - temp_kwargs = self.kwargs.copy() - temp_kwargs.update({"normalize_stream": True}) - reader = TheTradeDeskReader(**temp_kwargs) - reader.report_template_id = 1234 - reader.report_schedule_id = 5678 - reader.download_url = "https://download.url" - output = next(reader.read()) - expected = [ - {"Date": "2020-01-01", "Advertiser_ID": "XXXXX", "Impressions": 10}, - {"Date": "2020-02-01", "Advertiser_ID": "XXXXX", "Impressions": 11}, - {"Date": "2020-02-03", "Advertiser_ID": "XXXXX", "Impressions": 12}, - ] - for output_record, expected_record in zip(output.readlines(), iter(expected)): - self.assertEqual(output_record, expected_record) diff --git a/tests/readers/twitter/__init__.py b/tests/readers/twitter/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/twitter/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_twitter_reader.py b/tests/readers/twitter/test_reader.py similarity index 83% rename from tests/readers/test_twitter_reader.py rename to tests/readers/twitter/test_reader.py index d17def7a..10bbdca7 100644 --- a/tests/readers/test_twitter_reader.py +++ b/tests/readers/twitter/test_reader.py @@ -16,15 +16,15 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import datetime from unittest import TestCase, mock + from click import ClickException from freezegun import freeze_time -from datetime import datetime - +from ack.readers.twitter.reader import TwitterReader +from ack.utils.exceptions import DateDefinitionException from twitter_ads.client import Client -from nck.readers.twitter_reader import TwitterReader - class TwitterReaderTest(TestCase): @@ -46,6 +46,7 @@ class TwitterReaderTest(TestCase): "add_request_date_to_report": None, "start_date": datetime(2020, 1, 1), "end_date": datetime(2020, 1, 3), + "date_range": None, } @mock.patch.object(Client, "__init__", lambda *args: None) @@ -54,7 +55,7 @@ def test_validate_dates(self): temp_kwargs = self.kwargs.copy() params = {"start_date": datetime(2020, 1, 3), "end_date": datetime(2020, 1, 1)} temp_kwargs.update(params) - with self.assertRaises(ClickException): + with self.assertRaises(DateDefinitionException): TwitterReader(**temp_kwargs) @mock.patch.object(Client, "__init__", lambda *args: None) @@ -158,18 +159,8 @@ def test_parse_with_total_granularity(self): raw_analytics_response = { "time_series_length": 1, "data": [ - { - "id": "XXXXX", - "id_data": [ - {"segment": None, "metrics": {"retweets": [11], "likes": [12]}} - ], - }, - { - "id": "YYYYY", - "id_data": [ - {"segment": None, "metrics": {"retweets": [21], "likes": [22]}} - ], - }, + {"id": "XXXXX", "id_data": [{"segment": None, "metrics": {"retweets": [11], "likes": [12]}}]}, + {"id": "YYYYY", "id_data": [{"segment": None, "metrics": {"retweets": [21], "likes": [22]}}]}, ], } output = TwitterReader(**temp_kwargs).parse(raw_analytics_response) @@ -194,30 +185,8 @@ def test_parse_with_day_granularity(self): raw_analytics_response = { "time_series_length": 3, "data": [ - { - "id": "XXXXX", - "id_data": [ - { - "segment": None, - "metrics": { - "retweets": [11, 12, 13], - "likes": [14, 15, 16], - }, - } - ], - }, - { - "id": "YYYYY", - "id_data": [ - { - "segment": None, - "metrics": { - "retweets": [21, 22, 23], - "likes": [24, 25, 26], - }, - } - ], - }, + {"id": "XXXXX", "id_data": [{"segment": None, "metrics": {"retweets": [11, 12, 13], "likes": [14, 15, 16]}}]}, + {"id": "YYYYY", "id_data": [{"segment": None, "metrics": {"retweets": [21, 22, 23], "likes": [24, 25, 26]}}]}, ], } output = TwitterReader(**temp_kwargs).parse(raw_analytics_response) @@ -244,27 +213,15 @@ def test_parse_with_segment(self): { "id": "XXXXX", "id_data": [ - { - "segment": {"segment_name": "Male"}, - "metrics": {"retweets": [11], "likes": [12]}, - }, - { - "segment": {"segment_name": "Female"}, - "metrics": {"retweets": [13], "likes": [14]}, - }, + {"segment": {"segment_name": "Male"}, "metrics": {"retweets": [11], "likes": [12]}}, + {"segment": {"segment_name": "Female"}, "metrics": {"retweets": [13], "likes": [14]}}, ], }, { "id": "YYYYY", "id_data": [ - { - "segment": {"segment_name": "Male"}, - "metrics": {"retweets": [21], "likes": [22]}, - }, - { - "segment": {"segment_name": "Female"}, - "metrics": {"retweets": [23], "likes": [24]}, - }, + {"segment": {"segment_name": "Male"}, "metrics": {"retweets": [21], "likes": [22]}}, + {"segment": {"segment_name": "Female"}, "metrics": {"retweets": [23], "likes": [24]}}, ], }, ], @@ -316,9 +273,7 @@ def mock_parse(*args): @mock.patch.object(Client, "__init__", lambda *args: None) @mock.patch.object(Client, "accounts", lambda *args: None) - @mock.patch.object( - TwitterReader, "get_active_entity_ids", lambda *args: ["XXXXX", "YYYYYY"] - ) + @mock.patch.object(TwitterReader, "get_active_entity_ids", lambda *args: ["XXXXX", "YYYYYY"]) @mock.patch.object(TwitterReader, "get_job_ids", lambda *args: ["123456789"]) @mock.patch.object(TwitterReader, "get_job_result", mock_get_job_result) @mock.patch.object(TwitterReader, "get_raw_analytics_response", lambda *args: {}) diff --git a/tests/readers/yandex_campaign/__init__.py b/tests/readers/yandex_campaign/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/yandex_campaign/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/test_yandex_campaign_reader.py b/tests/readers/yandex_campaign/test_reader.py similarity index 52% rename from tests/readers/test_yandex_campaign_reader.py rename to tests/readers/yandex_campaign/test_reader.py index 48b4b0d2..1aa634eb 100644 --- a/tests/readers/test_yandex_campaign_reader.py +++ b/tests/readers/yandex_campaign/test_reader.py @@ -15,63 +15,50 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + import unittest +from ack.readers.yandex_campaign.reader import YandexCampaignReader from parameterized import parameterized -from nck.readers.yandex_campaign_reader import YandexCampaignReader - class TestYandexCampaignReader(unittest.TestCase): - - @parameterized.expand([ - ( - { - "campaign_states": (), - "campaign_ids": (), - "campaign_statuses": (), - "campaign_payment_statuses": () - }, - { - "method": "get", - "params": { - "SelectionCriteria": { + @parameterized.expand( + [ + ( + {"campaign_states": (), "campaign_ids": (), "campaign_statuses": (), "campaign_payment_statuses": ()}, + { + "method": "get", + "params": { + "SelectionCriteria": {}, + "FieldNames": ["Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"], }, - "FieldNames": ["Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"] - } - } - ), - ( - { - "campaign_states": ("ON",), - "campaign_ids": (), - "campaign_statuses": ("ACCEPTED",), - "campaign_payment_statuses": ("ALLOWED",) - }, - { - "method": "get", - "params": { - "SelectionCriteria": { - "States": ["ON"], - "Statuses": ["ACCEPTED"], - "StatusesPayment": ["ALLOWED"] + }, + ), + ( + { + "campaign_states": ("ON",), + "campaign_ids": (), + "campaign_statuses": ("ACCEPTED",), + "campaign_payment_statuses": ("ALLOWED",), + }, + { + "method": "get", + "params": { + "SelectionCriteria": {"States": ["ON"], "Statuses": ["ACCEPTED"], "StatusesPayment": ["ALLOWED"]}, + "FieldNames": ["Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"], }, - "FieldNames": ["Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"] - } - } - ) - ]) - def test_get_query_body( - self, - kwargs, - expected_query_body - ): + }, + ), + ] + ) + def test_get_query_body(self, kwargs, expected_query_body): reader = YandexCampaignReader( "123", ("Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"), campaign_ids=kwargs["campaign_ids"], campaign_states=kwargs["campaign_states"], campaign_statuses=kwargs["campaign_statuses"], - campaign_payment_statuses=kwargs["campaign_payment_statuses"] + campaign_payment_statuses=kwargs["campaign_payment_statuses"], ) self.assertDictEqual(reader._build_request_body(), expected_query_body) diff --git a/tests/readers/yandex_statistics/__init__.py b/tests/readers/yandex_statistics/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/readers/yandex_statistics/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/readers/yandex_statistics/test_reader.py b/tests/readers/yandex_statistics/test_reader.py new file mode 100644 index 00000000..5f018527 --- /dev/null +++ b/tests/readers/yandex_statistics/test_reader.py @@ -0,0 +1,230 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import datetime +import unittest + +import click +from ack.readers.yandex_statistics.reader import YandexStatisticsReader +from parameterized import parameterized + + +class TestYandexStatisticsReader(unittest.TestCase): + @parameterized.expand( + [ + ( + "ALL_TIME", + {"report_language": "en", "filters": (), "max_rows": None, "date_start": None, "date_stop": None}, + True, + { + "params": { + "SelectionCriteria": {}, + "FieldNames": ["AdFormat", "AdGroupId"], + "ReportName": "stats_report_2020-03_25", + "ReportType": "AD_PERFORMANCE_REPORT", + "DateRangeType": "ALL_TIME", + "Format": "TSV", + "IncludeVAT": "YES", + } + }, + ), + ( + "ALL_TIME", + {"report_language": "en", "filters": (), "max_rows": None, "date_start": None, "date_stop": None}, + False, + { + "params": { + "SelectionCriteria": {}, + "FieldNames": ["AdFormat", "AdGroupId"], + "ReportName": "stats_report_2020-03_25", + "ReportType": "AD_PERFORMANCE_REPORT", + "DateRangeType": "ALL_TIME", + "Format": "TSV", + "IncludeVAT": "NO", + } + }, + ), + ( + "CUSTOM_DATE", + { + "report_language": "en", + "filters": (), + "max_rows": 25, + "date_start": datetime.datetime(2020, 3, 5, 0, 0), + "date_stop": datetime.datetime(2020, 3, 25, 0, 0), + }, + False, + { + "params": { + "SelectionCriteria": {"DateFrom": "2020-03-05", "DateTo": "2020-03-25"}, + "Page": {"Limit": 25}, + "FieldNames": ["AdFormat", "AdGroupId"], + "ReportName": "stats_report_2020-03_25", + "ReportType": "AD_PERFORMANCE_REPORT", + "DateRangeType": "CUSTOM_DATE", + "Format": "TSV", + "IncludeVAT": "NO", + } + }, + ), + ( + "CUSTOM_DATE", + { + "report_language": "en", + "filters": (("AdGroupId", "EQUALS", ["1"]), ("CampaignId", "IN", ["1", "2"])), + "max_rows": 25, + "date_start": datetime.datetime(2020, 3, 5, 0, 0), + "date_stop": datetime.datetime(2020, 3, 25, 0, 0), + }, + False, + { + "params": { + "SelectionCriteria": { + "DateFrom": "2020-03-05", + "DateTo": "2020-03-25", + "Filter": [ + {"Field": "AdGroupId", "Operator": "EQUALS", "Values": ["1"]}, + {"Field": "CampaignId", "Operator": "IN", "Values": ["1", "2"]}, + ], + }, + "Page": {"Limit": 25}, + "FieldNames": ["AdFormat", "AdGroupId"], + "ReportName": "stats_report_2020-03_25", + "ReportType": "AD_PERFORMANCE_REPORT", + "DateRangeType": "CUSTOM_DATE", + "Format": "TSV", + "IncludeVAT": "NO", + } + }, + ), + ] + ) + def test_get_query_body(self, date_range, kwargs, include_vat, expected_query_body): + reader = YandexStatisticsReader( + "123", + ("AdFormat", "AdGroupId"), + "AD_PERFORMANCE_REPORT", + "stats_report_2020-03_25", + date_range, + include_vat, + report_language=kwargs["report_language"], + filters=kwargs["filters"], + max_rows=kwargs["max_rows"], + date_start=kwargs["date_start"], + date_stop=kwargs["date_stop"], + ) + self.assertDictEqual(reader._build_request_body(), expected_query_body) + + @parameterized.expand(["en", "ru", "uk"]) + def test_request_headers(self, report_language): + reader = YandexStatisticsReader( + "123", + ("AdFormat", "AdGroupId"), + "AD_PERFORMANCE_REPORT", + "stats_report_2020-03_25", + "ALL_TIME", + True, + report_language=report_language, + filters=(), + max_rows=None, + date_start=None, + date_stop=None, + ) + self.assertDictEqual( + {"skipReportSummary": "true", "Accept-Language": report_language}, reader._build_request_headers() + ) + + @parameterized.expand( + [ + ("ALL_TIME", None, None, {}), + ( + "CUSTOM_DATE", + datetime.datetime(2020, 1, 1), + datetime.datetime(2020, 1, 2), + {"DateFrom": "2020-01-01", "DateTo": "2020-01-02"}, + ), + ] + ) + def test_custom_dates_correctly_set(self, date_range, start_date, stop_date, expected): + reader = YandexStatisticsReader( + "123", + ("AdFormat", "AdGroupId"), + "AD_PERFORMANCE_REPORT", + "stats_report_2020-03_25", + date_range, + True, + date_start=start_date, + date_stop=stop_date, + ) + self.assertDictEqual(expected, reader._add_custom_dates_if_set()) + + @parameterized.expand( + [ + ( + "ALL_TIME", + datetime.datetime(2020, 1, 1), + datetime.datetime(2020, 1, 2), + "Wrong date range. If start and stop dates are set, should be CUSTOM_DATE.", + ), + ("CUSTOM_DATE", None, None, "Missing at least one date. Have you set start and stop dates?"), + ( + "CUSTOM_DATE", + datetime.datetime(2020, 1, 1), + None, + "Missing at least one date. Have you set start and stop dates?", + ), + ( + "CUSTOM_DATE", + None, + datetime.datetime(2020, 1, 1), + "Missing at least one date. Have you set start and stop dates?", + ), + ( + "ALL_TIME", + None, + datetime.datetime(2020, 1, 1), + ( + "Wrong combination of date parameters. " + "Only use date start and date stop with date range set to CUSTOM_DATE." + ), + ), + ( + "ALL_TIME", + datetime.datetime(2020, 1, 1), + None, + ( + "Wrong combination of date parameters. " + "Only use date start and date stop with date range set to CUSTOM_DATE." + ), + ), + ] + ) + def test_custom_dates_not_correctly_set(self, date_range, start_date, stop_date, error_message_expected): + reader = YandexStatisticsReader( + "123", + ("AdFormat", "AdGroupId"), + "AD_PERFORMANCE_REPORT", + "stats_report_2020-03_25", + date_range, + True, + date_start=start_date, + date_stop=stop_date, + ) + with self.assertRaises(click.ClickException) as click_exception: + reader._add_custom_dates_if_set() + self.assertEqual(click_exception.exception.message, error_message_expected) diff --git a/tests/streams/test_base_class.py b/tests/streams/test_base_class.py index 44c2d7bd..cf94b365 100644 --- a/tests/streams/test_base_class.py +++ b/tests/streams/test_base_class.py @@ -16,7 +16,7 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import unittest -from nck.streams.stream import Stream +from ack.streams.stream import Stream class TestStreamBaseClassMethods(unittest.TestCase): diff --git a/tests/streams/test_date_stream.py b/tests/streams/test_date_stream.py index 7cec017f..d9b4e62c 100644 --- a/tests/streams/test_date_stream.py +++ b/tests/streams/test_date_stream.py @@ -16,7 +16,7 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import unittest -from nck.streams.format_date_stream import FormatDateStream +from ack.streams.format_date_stream import FormatDateStream import json @@ -29,7 +29,7 @@ class TestStreamBaseClassMethods(unittest.TestCase): @staticmethod def data_generator(data): - for elem in data.split('\n'): + for elem in data.split("\n"): yield json.loads(elem) def test_usage(self): diff --git a/tests/test_main_method.py b/tests/test_main_method.py deleted file mode 100644 index 41b38fc8..00000000 --- a/tests/test_main_method.py +++ /dev/null @@ -1,30 +0,0 @@ -import unittest -import nck.entrypoint -from unittest import mock -from nck.streams.json_stream import JSONStream -from nck.streams.normalized_json_stream import NormalizedJSONStream - -from nck.readers.reader import Reader -from nck.writers.writer import Writer -from click.testing import CliRunner - - -class Test_Normalize_Option(unittest.TestCase): - runner = CliRunner() - - @staticmethod - def mock_generator(): - for _ in range(3): - yield {"plop plop": "plop"} - - @staticmethod - def mock_read(): - yield JSONStream("plop", Test_Normalize_Option.mock_generator()) - - @mock.patch.object(nck.readers.reader.Reader, "read", mock_read) - @mock.patch("nck.writers.writer.Writer.write") - def test_normalize_behaviour(self, mock_write): - r = Reader - w = Writer - nck.entrypoint.run([r, w], None, None, None, True) - self.assertEqual(mock_write.call_args[0][0].__class__, NormalizedJSONStream) diff --git a/tests/utils/test_date_handler.py b/tests/utils/test_date_handler.py index 0c50f72b..da0de59f 100644 --- a/tests/utils/test_date_handler.py +++ b/tests/utils/test_date_handler.py @@ -1,60 +1,87 @@ -from datetime import date import unittest -from unittest.mock import patch +from datetime import date, datetime +from freezegun import freeze_time +from ack.utils.date_handler import ( + check_date_range_definition_conformity, + get_date_start_and_date_stop_from_date_range, + build_date_range, +) +from ack.utils.exceptions import DateDefinitionException from parameterized import parameterized -from nck.utils.date_handler import get_date_start_and_date_stop_from_range - class TestDateHandler(unittest.TestCase): + @parameterized.expand( + [ + ("YESTERDAY", (date(2021, 1, 12), date(2021, 1, 12))), + ("LAST_7_DAYS", (date(2021, 1, 5), date(2021, 1, 12))), + ("LAST_90_DAYS", (date(2020, 10, 14), date(2021, 1, 12))), + ] + ) + @freeze_time("2021-01-13") + def test_get_date_start_and_date_stop_from_date_range(self, date_range, expected): + self.assertTupleEqual(get_date_start_and_date_stop_from_date_range(date_range), expected) + + @freeze_time("2021-01-11") + def test_get_previous_week_dates_if_monday(self): + self.assertTupleEqual( + get_date_start_and_date_stop_from_date_range("PREVIOUS_WEEK"), (date(2021, 1, 4), date(2021, 1, 10)) + ) + + @freeze_time("2021-01-13") + def test_get_previous_week_dates_if_midweek(self): + self.assertTupleEqual( + get_date_start_and_date_stop_from_date_range("PREVIOUS_WEEK"), (date(2021, 1, 4), date(2021, 1, 10)) + ) + + @freeze_time("2021-01-17") + def test_get_previous_week_dates_if_sunday(self): + self.assertTupleEqual( + get_date_start_and_date_stop_from_date_range("PREVIOUS_WEEK"), (date(2021, 1, 4), date(2021, 1, 10)) + ) - @parameterized.expand([ - ( - date(2020, 2, 1), - (date(2020, 1, 1), date(2020, 1, 31)) - ), - ( - date(2020, 1, 1), - (date(2019, 12, 1), date(2019, 12, 31)) - ), - ( - date(2020, 2, 15), - (date(2020, 1, 1), date(2020, 1, 31)) - ), - ( - date(2019, 12, 1), - (date(2019, 11, 1), date(2019, 11, 30)) + @freeze_time("2021-01-11") + def test_get_previous_month_dates_if_first_month_of_the_year(self): + self.assertTupleEqual( + get_date_start_and_date_stop_from_date_range("PREVIOUS_MONTH"), (date(2020, 12, 1), date(2020, 12, 31)) ) - ]) - def test_get_date_start_and_date_stop_with_previous_month(self, date_of_day, expected): - input_range = "PREVIOUS_MONTH" - with patch("nck.utils.date_handler.date") as mock_date: - mock_date.today.return_value = date_of_day - mock_date.side_effect = lambda *args, **kw: date(*args, **kw) - self.assertTupleEqual( - expected, - get_date_start_and_date_stop_from_range(input_range), - f"Bad return when freezed date is {date_of_day}" - ) - - @parameterized.expand([ - ( - date(2020, 1, 6), - (date(2019, 12, 29), date(2020, 1, 4)) - ), - ( - date(2020, 1, 13), - (date(2020, 1, 5), date(2020, 1, 11)) + + @freeze_time("2021-02-11") + def test_get_previous_month_dates_if_random_month_of_the_year(self): + self.assertTupleEqual( + get_date_start_and_date_stop_from_date_range("PREVIOUS_MONTH"), (date(2021, 1, 1), date(2021, 1, 31)) + ) + + @parameterized.expand( + [ + (None, date(2021, 1, 12), None), + (None, date(2021, 1, 12), "YESTERDAY"), + (date(2021, 1, 12), None, None), + (date(2021, 1, 12), None, "YESTERDAY"), + ] + ) + def test_check_date_range_definition_conformity_if_missing_date(self, start_date, end_date, date_range): + with self.assertRaises(DateDefinitionException): + check_date_range_definition_conformity(start_date, end_date, date_range) + + def test_check_date_range_definition_conformity_if_no_date(self): + with self.assertRaises(DateDefinitionException): + check_date_range_definition_conformity(None, None, None) + + def test_check_date_range_definition_conformity_if_inconsistent(self): + with self.assertRaises(DateDefinitionException): + check_date_range_definition_conformity(date(2021, 1, 12), date(2021, 1, 31), "YESTERDAY") + + @parameterized.expand([(date(2021, 1, 12), date(2021, 1, 31), None), (None, None, "YESTERDAY")]) + def test_check_date_range_definition_conformity(self, start_date, end_date, date_range): + self.assertIsNone(check_date_range_definition_conformity(start_date, end_date, date_range)) + + @freeze_time("2021-02-11") + def test_build_date_range_without_dates(self): + self.assertTupleEqual(build_date_range(None, None, "PREVIOUS_MONTH"), (datetime(2021, 1, 1), datetime(2021, 1, 31))) + + def test_build_date_range_with_dates(self): + self.assertTupleEqual( + build_date_range(datetime(2021, 1, 1), datetime(2021, 1, 31), None), (datetime(2021, 1, 1), datetime(2021, 1, 31)) ) - ]) - def test_get_date_start_and_date_stop_with_previous_week(self, date_of_day, expected): - input_range = "PREVIOUS_WEEK" - with patch("nck.utils.date_handler.date") as mock_date: - mock_date.today.return_value = date_of_day - mock_date.side_effect = lambda *args, **kw: date(*args, **kw) - self.assertTupleEqual( - expected, - get_date_start_and_date_stop_from_range(input_range), - f"Bad return when freezed date is {date_of_day}" - ) diff --git a/tests/utils/test_text_utils.py b/tests/utils/test_text_utils.py index 6268716c..c9249cec 100644 --- a/tests/utils/test_text_utils.py +++ b/tests/utils/test_text_utils.py @@ -18,7 +18,7 @@ import logging from unittest import TestCase -from nck.utils.text import parse_decoded_line, get_report_generator_from_flat_file +from ack.utils.text import parse_decoded_line, get_report_generator_from_flat_file class TestTextUtilsMethod(TestCase): @@ -100,26 +100,12 @@ def test_get_report_generator__add_column(self): b"2020-01-01,5678,2000", ] expected = [ - { - "Date": "2020-01-01", - "AdvertiserId": "1234", - "Reach": "1000", - "Campaign": "XMas Sale", - "Country": "France", - }, - { - "Date": "2020-01-01", - "AdvertiserId": "5678", - "Reach": "2000", - "Campaign": "XMas Sale", - "Country": "France", - }, + {"Date": "2020-01-01", "AdvertiserId": "1234", "Reach": "1000", "Campaign": "XMas Sale", "Country": "France"}, + {"Date": "2020-01-01", "AdvertiserId": "5678", "Reach": "2000", "Campaign": "XMas Sale", "Country": "France"}, ] line_iterator = iter(lines) output = get_report_generator_from_flat_file( - line_iterator, - add_column=True, - column_dict={"Campaign": "XMas Sale", "Country": "France"}, + line_iterator, add_column=True, column_dict={"Campaign": "XMas Sale", "Country": "France"}, ) for output_record, expected_record in zip(output, expected): self.assertEqual(output_record, expected_record) @@ -128,8 +114,7 @@ def test_get_report_generator__file_with_headers_only(self): lines = [b"Just,Headers,in,this,empty,report"] line_iterator = iter(lines) self.assertFalse( - next(get_report_generator_from_flat_file(line_iterator), False), - "Data is not empty", + next(get_report_generator_from_flat_file(line_iterator), False), "Data is not empty", ) def test_get_report_generator__skip_when_no_match_with_headers_length(self): @@ -140,9 +125,7 @@ def test_get_report_generator__skip_when_no_match_with_headers_length(self): b"Copyrigth: report downloaded from Artefact.com", ] line_iterator = iter(lines) - output = get_report_generator_from_flat_file( - line_iterator, skip_n_first=0, skip_n_last=0 - ) + output = get_report_generator_from_flat_file(line_iterator, skip_n_first=0, skip_n_last=0) expected = [ {"Date": "2020-01-01", "AdvertiserId": "1234", "Impressions": "10"}, {"Date": "2020-01-01", "AdvertiserId": "5678", "Impressions": "20"}, @@ -158,9 +141,7 @@ def test_get_report_generator__skip_blank(self): b"2020-01-01,5678,20", ] line_iterator = iter(lines) - output = get_report_generator_from_flat_file( - line_iterator, skip_n_first=0, skip_n_last=0 - ) + output = get_report_generator_from_flat_file(line_iterator, skip_n_first=0, skip_n_last=0) expected = [ {"Date": "2020-01-01", "AdvertiserId": "1234", "Impressions": "10"}, {"Date": "2020-01-01", "AdvertiserId": "5678", "Impressions": "20"}, @@ -178,9 +159,7 @@ def test_get_report_generator__skip_first_and_last(self): b"(Not desired last line)", ] line_iterator = iter(lines) - output = get_report_generator_from_flat_file( - line_iterator, skip_n_first=2, skip_n_last=1 - ) + output = get_report_generator_from_flat_file(line_iterator, skip_n_first=2, skip_n_last=1) expected = [ {"Date": "2020-01-01", "AdvertiserId": "1234", "Impressions": "10"}, {"Date": "2020-01-01", "AdvertiserId": "5678", "Impressions": "20"}, @@ -197,9 +176,7 @@ def test_get_report_generator__skip_last_with_blank_at_end_of_file(self): b"", ] line_iterator = iter(lines) - output = get_report_generator_from_flat_file( - line_iterator, skip_n_first=0, skip_n_last=1 - ) + output = get_report_generator_from_flat_file(line_iterator, skip_n_first=0, skip_n_last=1) expected = [ {"Date": "2020-01-01", "AdvertiserId": "1234", "Impressions": "10"}, {"Date": "2020-01-01", "AdvertiserId": "5678", "Impressions": "20"}, @@ -214,9 +191,7 @@ def test_get_report_generator__skip_no_first_nor_last(self): b"2020-01-01,5678,20", ] line_iterator = iter(lines) - output = get_report_generator_from_flat_file( - line_iterator, skip_n_first=0, skip_n_last=0 - ) + output = get_report_generator_from_flat_file(line_iterator, skip_n_first=0, skip_n_last=0) expected = [ {"Date": "2020-01-01", "AdvertiserId": "1234", "Impressions": "10"}, {"Date": "2020-01-01", "AdvertiserId": "5678", "Impressions": "20"}, diff --git a/tests/writers/__init__.py b/tests/writers/__init__.py new file mode 100644 index 00000000..d46139b7 --- /dev/null +++ b/tests/writers/__init__.py @@ -0,0 +1,17 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/tests/writers/amazon_s3/__init__.py b/tests/writers/amazon_s3/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/writers/amazon_s3/test_writer.py b/tests/writers/amazon_s3/test_writer.py new file mode 100644 index 00000000..5266ea9c --- /dev/null +++ b/tests/writers/amazon_s3/test_writer.py @@ -0,0 +1,56 @@ +import json +from unittest import TestCase + +import boto3 +from moto import mock_s3 +from ack.streams.json_stream import JSONStream +from ack.writers.amazon_s3.writer import AmazonS3Writer +from parameterized import parameterized + +list_dict = [{"a": "4", "b": "5", "c": "6"}, {"a": "7", "b": "8", "c": "9"}] + + +def dict_generator(list_dict): + for di in list_dict: + yield di + + +def mock_stream(list_dict, name): + return JSONStream(name, dict_generator(list_dict)) + + +@mock_s3 +class AmazonS3WriterTest(TestCase): + @classmethod + @mock_s3 + def setUpClass(cls): + client1 = boto3.resource("s3", region_name="us-east-1") + client1.create_bucket(Bucket="test") + + def test_bucket_doesnt_exist(self): + with self.assertRaisesRegex(Exception, "non-existing-bucket bucket does not exist. available buckets are \['test'\]"): + AmazonS3Writer("non-existing-bucket", "us-east-1", "", "") + + @parameterized.expand( + [(None, "stream_name.format", "stream_name.format"), ("file_name", "stream_name.format", "file_name.format")] + ) + def test_valid_filename(self, file_name, stream_name, expected): + writer = AmazonS3Writer("test", "us-east-1", "", "", prefix=None, filename=file_name) + writer._set_valid_file_name(stream_name) + self.assertEqual(expected, writer._file_name) + + def test_Write(self): + writer = AmazonS3Writer("test", "us-east-1", "", "") + writer.write(mock_stream(list_dict, "test")) + + client = boto3.resource("s3", region_name="us-east-1") + bucket = client.Bucket("test") + + obj = list(bucket.objects.all())[0] + + bod = obj.get()["Body"].read().decode("utf-8") + lines = bod.split("\n") + + for i, line in enumerate(lines[:-1]): + json_line = json.loads(line) + self.assertEqual(json_line, list_dict[i]) diff --git a/vendor/instantclient-basic-linux.x64-12.1.0.2.0.zip b/vendor/instantclient-basic-linux.x64-12.1.0.2.0.zip deleted file mode 100644 index 5caca341..00000000 Binary files a/vendor/instantclient-basic-linux.x64-12.1.0.2.0.zip and /dev/null differ diff --git a/vendor/instantclient-sdk-linux.x64-12.1.0.2.0.zip b/vendor/instantclient-sdk-linux.x64-12.1.0.2.0.zip deleted file mode 100644 index 74e3ed1c..00000000 Binary files a/vendor/instantclient-sdk-linux.x64-12.1.0.2.0.zip and /dev/null differ