diff --git a/.gitignore b/.gitignore
index 65e9d1d..8ef116c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -129,11 +129,18 @@ dmypy.json
 .pyre/
 
 # Ignore
-.vscode/
+
 censo/bin/
 censo/include/
 censo/share/
 
 # Data
 notebooks/*.csv
-
+*.zip
+cache
+
+# Azure stuff
+.vscode
+__azurite*
+__blob*
+local.settings.json
\ No newline at end of file
diff --git a/utils/.dockerignore b/utils/.dockerignore
new file mode 100644
index 0000000..85377aa
--- /dev/null
+++ b/utils/.dockerignore
@@ -0,0 +1,3 @@
+__pycache*
+.*cache
+.venv
\ No newline at end of file
diff --git a/utils/.env.template b/utils/.env.template
new file mode 100644
index 0000000..03f43af
--- /dev/null
+++ b/utils/.env.template
@@ -0,0 +1,36 @@
+# Environment variables
+
+### IMPORTANT: when done editing this file, rename it to ".env" 
+### (without the ".template" ending)
+
+## General settings
+
+### log verbosity level - choose between 'error', 'warn', 'info', 'debug'
+FETCHPORTALS_LOG_LEVEL="debug"
+
+### set source of portal URLs and geo IDs - only 'census' is currently accepted
+FETCHPORTALS_SOURCE="census"
+
+### control whether to only ping portal status, or to fetch its source code
+FETCHPORTALS_MODE="ping"
+
+### set maximum number of tries and time waiting
+FETCHPORTALS_MAX_RETRIES=3
+FETCHPORTALS_TIMEOUT=10.0
+
+### control callback to process and/or save the retrived data
+FETCHPORTALS_CALLBACK="kaggle"
+
+### control what to do when destination file already exists - must be one of
+### 'replace', 'append' or 'skip'
+FETCHPORTALS_EXISTING="replace"
+
+### set a local directory where retrieved files may persist
+FETCHPORTALS_LOCALDIR="./data"
+
+## Kaggle settings
+
+KAGGLE_USERNAME="exampleuser"
+KAGGLE_KEY="12345678abcdefgh"
+KAGGLE_DATASET="bcbernardo/censusqd2020"
+KAGGLE_FILE="portals-availability.csv"
\ No newline at end of file
diff --git a/utils/.funcignore b/utils/.funcignore
new file mode 100644
index 0000000..010071a
--- /dev/null
+++ b/utils/.funcignore
@@ -0,0 +1,5 @@
+.env
+.venv
+local.settings.json
+__azurite*
+__pycache__
\ No newline at end of file
diff --git a/utils/README.md b/utils/README.md
new file mode 100644
index 0000000..5bf9183
--- /dev/null
+++ b/utils/README.md
@@ -0,0 +1,134 @@
+# Utilitários do Censo Querido Diário
+
+Este sub-repositório inclui rotinas e funções elaboradas pela comunidade para
+processar, analisar e salvar os resultados do Censo Querido Diário.
+
+Atualmente, o único utilitário desenvolvido é o pacote `fetch_portals`, que se
+comunica com todos os endereços web cadastrados no Censo para checar quais
+portais estão *online* e/ou para obter seu código-fonte.
+
+Contribuições na forma de novos pacotes e utilitários para processar os dados
+do Censo são bem-vindas. Cheque o [CONTRIBUTING.md](../CONTRIBUTING.md) do
+projeto para mais detalhes de como ajudar nas diferentes tarefas do Censo, bem
+como a seção [Adicionando um novo utilitário](#adicionando-um-novo-utilitário)
+para instruções específicas de como criar uma nova rotina de pré-processamento.
+
+Se tiver alguma dúvida ou quiser ter uma visão geral dos próximos passos do
+Censo, não hesite em visitar as
+[issues](https://github.com/okfn-brasil/censo-querido-diario/issues) do projeto
+ou entrar em contato pelo [Discord](https://discord.gg/M6ep5VED).
+
+## Instalação e execução
+
+### Com o Docker (recomendado)
+
+A forma mais simples de rodar os utilitários é utilizando o Docker. Você deve
+ter instalado o Docker Community Edition. Encontre a versão apropriada para o
+seu sistema [aqui][Docker CE]. Você também deve ter o [git] instalado para fazer download do repositório.
+
+Para instalar os utilitários, abra um terminal de linha de comando e rode os
+seguintes comandos:
+
+```bash
+$ git clone https://github.com/okfn-brasil/censo-querido-diario.git
+$ cd censo-querido-diario/utils
+```
+
+Em um explorador de arquivos, encontre o diretório onde você fez download do
+repositório e abra o arquivo `censo-querido-diario/utils/.env.template`. Adapte
+as configurações presentes no arquivo de acordo com os dados que pretende obter
+(especialmente as iniciadas em `KAGGLE_*`, se for exportar para o Kaggle).
+Salve o arquivo modificado renomeie-o para `.env` (sem o `.template` no final).
+
+Para inicializar a checagem dos portais, basta voltar ao terminal e inserir o
+comando:
+
+```bash
+$ docker-compose up
+```
+
+[Docker CE]: https://hub.docker.com/search?offering=community&type=edition
+[git]: https://git-scm.com/
+
+### Como pacote Python
+
+Os utilitários contidos nesse sub-repositório podem ser instalados como pacotes
+Python avulsos. Para isso, você deve ter instalada na sua máquina uma versão
+Python compatível (3.7 ou superior).
+
+Para instalar a partir do repositório, rode em um terminal de linha de comando:
+
+```bash
+$ git clone https://github.com/okfn-brasil/censo-querido-diario.git
+$ cd censo-querido-diario/utils
+$ python -m venv .venv
+$ source .venv/bin/activate  # no PowerShell: $ .venv/Scripts\activate.ps1
+(.venv) $ python -m pip install .
+```
+
+Para que a instalação funcione e você possa usar o comando `fetch-portals` da
+linha de comando, é necessário que antes você exporte algumas variáveis de
+ambiente, que controlam o funcionamento do programa.
+
+Para isso, edite o arquivo `.env.template`, contido no diretório
+`censo-querido-diario/utils`, alterando as configurações necessárias.
+**Importante:** para rodar a versão atual do coletor de portais, você deve, no
+mínimo, alterar as variáveis de ambiente iniciadas em `KAGGLE_*`. Você precisa
+ter permissão de escrita no dataset utilizado para salvar os resultados.
+
+Quando finalizar a edição, salve o arquivo `.env.template` e renomei-o para
+`.env`, apenas.
+<!--
+
+No terminal, acesse a pasta `censo-querido-diario/utils` e rode
+o seguinte comando:
+
+No Unix/MacOS:
+
+```bash
+$ set -a; . .env; set +a
+```
+
+No Windows/PowerShell, pode ser necessário adicionar um pacote adicional para
+ler as variáveis de ambiente do arquivo `.env` (privilégios de administrador
+podem ser necessários):
+
+```powershell
+PS> Install-Module -Name Set-PsEnv
+```
+
+-->
+
+Com o utilitário instalado como um pacote e o respectivo ambiente virtual
+ativado, basta rodar o comando `fetch-portals` na linha de comando. Esse
+comando fará requisições a todos os portais de publicação de diários oficiais
+mapeados no Censo, e salvará os resultados no dataset do Kaggle indicado no
+arquivo `.env`.
+
+```bash
+(.venv) $ fetch-portals
+```
+
+## Adicionando um novo utilitário
+
+Para desenvolver um pacote Python que consuma e processe os dados do Censo
+Querido Diário, [faça um
+*fork*](https://github.com/okfn-brasil/censo-querido-diario/fork) do
+repositório para a sua própria conta e adicione os scripts em um sub-diretório
+da pasta `censo-querido-diario/utils/src`.
+
+Para o nome do diretório e dos módulos, utilize apenas letras minúsculas e
+*underscores* (\_). Insira também um arquivo `__init__.py` vazio no diretório
+criado, e adicione as dependências utilizadas na lista de pacotes abaixo do
+item `install_requires` do arquivo
+[`censo-querido-diario/utils/setup.cfg`](./setup.cfg).
+
+Se você quiser que o utilitário seja acessível por meio do Docker, crie um
+arquivo chamado `<NOME_DO_UTILITARIO>.Dockerfile` em
+`censo-querido-diario/utils`, contendo as instruções de construção do contâiner
+(veja a [referência do Dockerfile]). Em seguida, adicione uma entrada no
+arquivo `docker-compose.yml` localizado no mesmo diretório (veja a [referência
+do Docker Compose] para mais detalhes).
+
+[referência do Dockerfile]: https://docs.docker.com/engine/reference/builder/
+[referência do Docker Compose]: https://docs.docker.com/compose/compose-file/
diff --git a/utils/azure_ping_portals/__init__.py b/utils/azure_ping_portals/__init__.py
new file mode 100644
index 0000000..4507381
--- /dev/null
+++ b/utils/azure_ping_portals/__init__.py
@@ -0,0 +1,20 @@
+# Copyright 2020 Open Knowledge Brasil
+
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+"""Periodically check the availability of Official Gazettes portals.
+"""
+
+import logging
+
+import azure.functions as func
+from fetch_portals.main import main as fetch
+
+
+def main(timer: func.TimerRequest):
+    """Ping Querido Diario Census portals to check their availability."""
+    logging.info(f"Starting function (past due {timer.past_due})")
+    fetch(mode="ping", existing="append", callback="kaggle")
+    logging.info("Finished checking portals from Census.")
diff --git a/utils/azure_ping_portals/function.json b/utils/azure_ping_portals/function.json
new file mode 100644
index 0000000..abb1e13
--- /dev/null
+++ b/utils/azure_ping_portals/function.json
@@ -0,0 +1,11 @@
+{
+    "scriptFile": "__init__.py",
+    "bindings": [
+        {
+            "name": "timer",
+            "type": "timerTrigger",
+            "direction": "in",
+            "schedule": "0 0 */3 * * *"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/utils/docker-compose.yml b/utils/docker-compose.yml
new file mode 100644
index 0000000..a96e8e0
--- /dev/null
+++ b/utils/docker-compose.yml
@@ -0,0 +1,11 @@
+version: '3.8'
+services:
+  fetch-portals:
+    build: 
+      context: .
+      dockerfile: fetch_portals.Dockerfile
+    volumes:
+      - ./cache:/usr/src/data
+    env_file: .env
+    environment:
+      - FETCHPORTALS_LOCALDIR=/usr/src/data
\ No newline at end of file
diff --git a/utils/fetch_portals.Dockerfile b/utils/fetch_portals.Dockerfile
new file mode 100644
index 0000000..ecd8683
--- /dev/null
+++ b/utils/fetch_portals.Dockerfile
@@ -0,0 +1,17 @@
+FROM python:3.8.5-slim
+
+# Setup env
+ENV LANG C.UTF-8
+ENV LC_ALL C.UTF-8
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONFAULTHANDLER 1
+
+RUN mkdir /usr/src/app
+WORKDIR /usr/src/app
+
+COPY . .
+
+RUN python -m pip install .
+
+# Run the executable
+ENTRYPOINT ["fetch-portals"]
diff --git a/utils/host.json b/utils/host.json
new file mode 100644
index 0000000..f0abc88
--- /dev/null
+++ b/utils/host.json
@@ -0,0 +1,5 @@
+{
+    "functionTimeout": "00:10:00",
+    "version": "2.0",
+    "watchDirectories": [ "src" ]
+}
\ No newline at end of file
diff --git a/utils/pyproject.toml b/utils/pyproject.toml
new file mode 100644
index 0000000..b5a3c46
--- /dev/null
+++ b/utils/pyproject.toml
@@ -0,0 +1,6 @@
+[build-system]
+requires = [
+    "setuptools>=42",
+    "wheel"
+]
+build-backend = "setuptools.build_meta"
\ No newline at end of file
diff --git a/utils/requirements-dev.txt b/utils/requirements-dev.txt
new file mode 100644
index 0000000..bf1a8b0
--- /dev/null
+++ b/utils/requirements-dev.txt
@@ -0,0 +1 @@
+.[dev]
\ No newline at end of file
diff --git a/utils/requirements.txt b/utils/requirements.txt
new file mode 100644
index 0000000..945c9b4
--- /dev/null
+++ b/utils/requirements.txt
@@ -0,0 +1 @@
+.
\ No newline at end of file
diff --git a/utils/setup.cfg b/utils/setup.cfg
new file mode 100644
index 0000000..db231a8
--- /dev/null
+++ b/utils/setup.cfg
@@ -0,0 +1,41 @@
+[metadata]
+name = censusqdutils
+version = 0.1.0
+url = https://github.com/okfn-brasil/censo-querido-diario
+author = Open Knowledge Brasil
+author_email = contato@serenata.ai
+classifiers =
+    Programming Language :: Python :: 3
+    License :: OSI Approved :: MIT License
+    Operating System :: OS Independent
+description = Utils for processing Querido Diario Census data.
+long_description = file: README.md
+long_description_content_type = text/markdown
+license = MIT
+
+[options]
+python_requires = >=3.7
+package_dir =
+    =src
+packages = find:
+install_requires =
+    aiohttp >= 3.7
+    kaggle >= 1.5
+    pandas >= 1.2
+    python-dotenv >= 0.15
+
+[options.extras_require]
+dev = 
+    black == 20.8b1
+    flake8 >= 3.8.4
+    isort >= 5.7.0
+    mypy >= 0.800
+    pandas-stubs >= 1.0.4.4
+    pytest >= 6.2.2
+
+[options.packages.find]
+where = src
+
+[options.entry_points]
+console_scripts =
+    fetch-portals = fetch_portals.main:main
diff --git a/utils/src/fetch_portals/__init__.py b/utils/src/fetch_portals/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/src/fetch_portals/callbacks.py b/utils/src/fetch_portals/callbacks.py
new file mode 100644
index 0000000..2ca02c2
--- /dev/null
+++ b/utils/src/fetch_portals/callbacks.py
@@ -0,0 +1,196 @@
+# Copyright 2020 Open Knowledge Brasil
+
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+"""Callback functions to save data fetched from official gazettes portals.
+
+This module contains callback functions to process and/or save contents and
+monitor the service availability of portals containing the official gazettes
+for the 5.570 brazilian municipalities.
+"""
+
+import json
+import logging
+import os
+from dataclasses import is_dataclass
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Any, Iterable, Literal, Optional, Union
+
+import pandas as pd
+
+from .models import ExistingBehavior, PathLike, PortalCapture
+
+
+def _autogen_version_notes(
+    dest_file: str, operation: Literal["create", "append", "update"]
+) -> str:
+    """Generate a default version note message.
+
+    Parameters:
+        dest_file: Destination file being written (or appended to).
+        operation: Whether the file is being ``create``'d, ``append`'ed to or
+            completely ``update``'d (replaced).
+    """
+
+    logging.warning(
+        "Version notes not provided; a default message will be generated."
+    )
+
+    if operation == "create":
+        version_notes = "Create " + dest_file
+    elif operation == "append":
+        version_notes = "Add records to " + dest_file
+    elif operation == "update":
+        version_notes = "Update " + dest_file
+
+    return version_notes
+
+
+def to_kaggle(
+    data: Union[Iterable[Union[dict, PortalCapture]], pd.DataFrame],
+    dataset: str,
+    dest_file: str,
+    existing_behavior: ExistingBehavior = "replace",
+    version_notes: Optional[str] = None,
+    local_dir: Optional[PathLike] = None,
+    delete_old_versions: bool = False,
+) -> "DatasetNewVersionResponse":  # type: ignore  # noqa: F821
+    """Write data to a destination dataset file in Kaggle.
+
+    Parameters:
+        data: Data to be uploaded to the dataset. Can be a pandas `DataFrame`_
+            instance, or an iterable of dataclass objects or dictionaries.
+        dataset: Kaggle dataset id, in the format ``<owner username>/<dataset
+            name>``.
+        dest_file: How to name the destination file in the dataset context.
+        existing_behavior: What to do if the file already exists in the
+            dataset.
+        version_notes: A message describing what changes will be made to the
+            dataset (optional; a default message will be generated if none was
+            given).
+        local_dir: A local directory where the dataset files will persist
+            (optional; defaults to None).
+        delete_old_versions: Whether to delete previous versions of the
+            dataset that exist in Kaggle.
+
+    Returns
+        A `DatasetNewVersionResponse`_ instance with the new dataset version.
+
+    .. _DataFrame: https://pandas.pydata.org/pandas-docs/stable/reference/
+            frame.html
+    .. _DatasetNewVersionResponse: https://github.com/Kaggle/kaggle-api/blob/
+        89eb72dd811492c500839f65332f669cd839d2bc/kaggle/models/
+        kaggle_models_extended.py#L150
+    """
+
+    from kaggle.api.kaggle_api_extended import KaggleApi  # type: ignore
+    from kaggle.models.kaggle_models_extended import Metadata  # type: ignore
+
+    # check data object type is supported
+    if all(is_dataclass(record) for record in data) or all(
+        isinstance(record, dict) for record in data
+    ):
+        data = pd.DataFrame(data)
+    elif isinstance(data, pd.DataFrame):
+        pass
+    else:
+        raise TypeError(
+            "`data` parameter must be a list os Dataclass instances, a "
+            + f"dictionary or a pandas DataFrame, not {type(data).__name__}."
+        )
+
+    logging.info(
+        f"Uploading {len(data.index)} records to '{dest_file}' file in "
+        + f"Kaggle's '{dataset}' dataset."
+    )
+
+    # authenticate Kaggle API
+    logging.debug("Authenticating to Kaggle API...")
+    api = KaggleApi()
+    api.authenticate()
+
+    # make sure dataset exists
+    logging.debug("Searching dataset...")
+    try:
+        dataset_owner, dataset_name = dataset.split("/")
+        matching_datasets = api.dataset_list(
+            search=dataset_name, user=dataset_owner
+        )
+        assert dataset in [dataset.ref for dataset in matching_datasets]
+    except AssertionError:
+        # TODO: create dataset if it doesn't exist
+        raise ValueError("The dataset does not exist.")
+
+    # use the provided local (persistent) directory, or create a temporary one
+    if not local_dir:
+        tmpdir = TemporaryDirectory()
+        data_dir: Any = tmpdir.name
+        logging.debug(f"Creatd temporary directory: {data_dir}")
+    else:
+        data_dir = local_dir
+
+    # get dataset metadata
+    metafile = Path(data_dir, "datapackage.json")
+    if not os.path.isfile(metafile):
+        metadata_response = api.process_response(
+            api.metadata_get_with_http_info(dataset_owner, dataset_name)
+        )
+        metadata = Metadata(metadata_response)
+        with open(metafile, "w") as f:
+            json.dump(metadata, f, indent=2, default=lambda o: o.__dict__)
+
+    # download existing files
+    # TODO: skip downloading unchanged files if they already exist locally
+    # TODO: start downloading asynchronously while data is gathered
+    api.dataset_download_files(dataset, path=data_dir, unzip=True)
+
+    # write data file as CSV
+    operation: Literal["create", "append", "update"]
+    if os.path.isfile(Path(data_dir, dest_file)):
+        if existing_behavior == "replace":
+            operation = "update"
+            data.to_csv(Path(data_dir, dest_file), mode="w")
+        elif existing_behavior == "append":
+            operation = "append"
+            data.to_csv(Path(data_dir, dest_file), mode="a")
+        elif existing_behavior == "skip":
+            logging.error(f"File '{dest_file}' already exists. Skiped.")
+            raise FileExistsError(
+                "File already exists and behavior is set to `skip`."
+            )
+        else:
+            raise ValueError(
+                "`existing_behavior` argument must be one of "
+                + f"`replace`, `append` or `skip` ('{existing_behavior}' "
+                + "provided)."
+            )
+    else:
+        operation = "create"
+        data.to_csv(Path(data_dir, dest_file))
+
+    # # update metadata file
+    # BUG
+    # try:
+    #     api.dataset_metadata_update(dataset, data_dir)
+    # except KeyError:
+    #     # BUG: KaggleApi's dataset_metadata_update() method references an
+    #     # inexistent key for checking for errors. Just ignore it.
+    #     pass
+
+    # create a version notes message, if user hasn't provided one
+    if not version_notes:
+        version_notes = _autogen_version_notes(dest_file, operation)
+
+    # upload data to Kaggle
+    new_version = api.dataset_create_version(
+        data_dir, version_notes, delete_old_versions=delete_old_versions
+    )
+
+    # clear temporary directory
+    if not local_dir:
+        tmpdir.cleanup()
+
+    return new_version
diff --git a/utils/src/fetch_portals/fetchers.py b/utils/src/fetch_portals/fetchers.py
new file mode 100644
index 0000000..5eb9621
--- /dev/null
+++ b/utils/src/fetch_portals/fetchers.py
@@ -0,0 +1,100 @@
+# Copyright 2020 Open Knowledge Brasil
+
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+"""Functions to fetch official gazette portals statuses and contents.
+
+This module contains functions developed as a part of the `Censo Querido
+Diário`_ effort, in order to periodically fetch the contents and monitor the
+service availability of portals containing the official gazettes for the 5.526
+brazilian municipalities.
+
+The Censo Querido Diário is a collaborative effort to push forward the
+disclosure of public information embodied in official publications.
+Contributions to this initiative are more than welcome. Check our
+`contribution guidelines`_ (in portuguese) to learn the various ways you can
+support the project.
+
+.. _Censo Querido Diário:
+    https://censo.ok.org.br/sobre/
+
+.. _contribution guidelines:
+    https://github.com/okfn-brasil/censo-querido-diario/blob/main/CONTRIBUTING.MD
+"""
+
+import asyncio
+import logging
+from typing import cast
+
+from typing import List
+
+from .models import AcceptedHttpMethod, FetchMode, PortalList
+
+
+async def _gather_responses(
+    portals: PortalList,
+    mode: FetchMode = "ping",
+    max_retries: int = 3,
+    timeout: float = 10.0,
+):
+    """Orchestrates asynchronous requests to official gazettes portals.
+
+    Parameters:
+        portals: A `PortalList` instance to be fetched.
+        mode: How to fetch the portals. ``mode="ping"`` fetches only the
+            portals' status codes and request metadate. ``mode="capture"`` also
+            captures the portals' source code.
+    """
+
+    logging.info("Preparing fetch tasks...")
+
+    portals = PortalList(portals)
+
+    if mode == "ping":
+        http_method = "HEAD"
+    elif mode == "source":
+        http_method = "GET"
+
+    task_list: List = list()
+
+    for subset in portals.by_domain():
+        task: asyncio.Task = asyncio.create_task(
+            subset.fetch_all(
+                method=cast(AcceptedHttpMethod, http_method),
+                max_retries=max_retries,
+                timeout=timeout,
+            )
+        )
+        task_list.append(task)
+
+    return await asyncio.gather(*task_list)
+
+
+def fetch_portals(
+    portals: PortalList,
+    mode: FetchMode = "ping",
+    max_retries: int = 3,
+    timeout: float = 10.0,
+):
+    """Orchestrates asynchronous requests to official gazettes portals.
+
+    Parameters:
+        portals: A `PortalList` instance to be fetched.
+        mode: How to fetch the portals. ``mode="ping"`` fetches only the
+            portals' status codes and request metadate. ``mode="capture"`` also
+            captures the portals' source code.
+    """
+
+    results = list()
+
+    task_list = asyncio.run(
+        _gather_responses(portals, mode, max_retries, timeout)
+    )
+
+    for task_results in task_list:
+        for result in task_results:
+            results.append(result)
+
+    return results
diff --git a/utils/src/fetch_portals/main.py b/utils/src/fetch_portals/main.py
new file mode 100644
index 0000000..e01d742
--- /dev/null
+++ b/utils/src/fetch_portals/main.py
@@ -0,0 +1,127 @@
+# Copyright 2020 Open Knowledge Brasil
+
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+"""Main logic to capture data from official gazettes portals.
+
+This module contains main logic to capture official gazette portals' status and
+source code for Brazilian municipalities. developed as a part of the `Censo
+Querido Diário`_ effort, in order to periodically fetch the contents and
+monitor the service availability of portals containing the official gazettes
+for the 5.526 brazilian municipalities.
+
+The Censo Querido Diário is a collaborative effort to push forward the
+disclosure of public information embodied in official publications.
+Contributions to this initiative are more than welcome. Check our `contribution
+guidelines`_ (in portuguese) to learn the various ways you can support the
+project.
+
+.. _Censo Querido Diário: https://censo.ok.org.br/sobre/
+
+.. _contribution guidelines:
+    https://github.com/okfn-brasil/censo-querido-diario/blob/main/CONTRIBUTING.MD
+"""
+
+import json
+import logging
+import os
+import sys
+from pathlib import Path
+from typing import cast, Final, List, Optional
+
+from dotenv import load_dotenv
+
+from .callbacks import to_kaggle
+from .fetchers import fetch_portals
+from .models import (
+    AcceptedCallback,
+    AcceptedSource,
+    ExistingBehavior,
+    FetchMode,
+    LogLevel,
+    PathLike,
+    PortalCapture,
+)
+from .sources import get_portals_from_census
+
+# get configurations from environment variables, or use defaults
+script_path: Path = Path(os.path.abspath(__file__))
+load_dotenv(os.path.join(script_path.parents[2], ".env"))
+
+
+# load settings from environment, or use defaults
+SOURCE: Final = os.getenv("FETCHPORTALS_SOURCE", "census")
+MODE: Final = os.getenv("FETCHPORTALS_MODE", "ping")
+CALLBACK: Final = os.getenv("FETCHPORTALS_CALLBACK", None)
+LOCAL_DIR: Final = os.getenv("FETCHPORTALS_LOCALDIR", None)
+EXISTING: Final = os.getenv("FETCHPORTALS_EXISTING", "replace")
+MAX_RETRIES: Final = int(os.getenv("FETCHPORTALS_MAX_RETRIES", 3))
+TIMEOUT: Final = float(os.getenv("FETCHPORTALS_TIMEOUT", 10.0))
+LOG_LEVEL: Final = os.getenv("FETCHPORTALS_LOG_LEVEL", "warning")
+
+
+def main(
+    source: AcceptedSource = cast(AcceptedSource, SOURCE),
+    mode: FetchMode = cast(FetchMode, MODE),
+    callback: Optional[AcceptedCallback] = cast(
+        Optional[AcceptedCallback], CALLBACK
+    ),
+    local_dir: Optional[PathLike] = LOCAL_DIR,
+    existing: ExistingBehavior = cast(ExistingBehavior, EXISTING),
+    max_retries: int = MAX_RETRIES,
+    timeout: float = TIMEOUT,
+    log_level: LogLevel = cast(LogLevel, LOG_LEVEL),
+) -> None:
+    """Main program entry point."""
+    # init logs
+    logging.basicConfig(
+        format="%(asctime)s %(message)s",
+        level=getattr(logging, log_level.upper())
+    )
+
+    # get a list of portals
+    if source == "census":
+        portals = get_portals_from_census()
+    else:
+        raise ValueError(f"'{source}' is not a valid source.")
+
+    # fetch them
+    captures: List[PortalCapture] = fetch_portals(portals=portals, mode=mode)
+
+    # save captured data
+    if callback == "kaggle":  # save to a Kaggle dataset file
+        try:
+            assert "KAGGLE_USERNAME" in os.environ
+            assert "KAGGLE_KEY" in os.environ
+        except AssertionError:
+            logging.error("Kaggle credentials not found in environment.")
+            raise RuntimeError
+        dest_dataset: str = os.environ["KAGGLE_DATASET"]
+        dest_file: str = os.environ["KAGGLE_FILE"]
+        to_kaggle(
+            captures,
+            dataset=dest_dataset,
+            dest_file=dest_file,
+            existing_behavior=existing,
+            local_dir=local_dir,
+        )
+
+    # print to stdout (default)
+    elif not callback:
+        results_json: str = json.dumps(
+            [capture.to_dict() for capture in captures],
+            indent=4,
+            sort_keys=True,
+            separators=(",", ": "),
+        ).replace("\\n", "\n")
+        sys.stdout.write(results_json)
+
+    # unimplemented callback
+    else:
+        raise ValueError(f"'{callback}' is not a valid callback.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/src/fetch_portals/models.py b/utils/src/fetch_portals/models.py
new file mode 100644
index 0000000..296cf3a
--- /dev/null
+++ b/utils/src/fetch_portals/models.py
@@ -0,0 +1,238 @@
+# Copyright 2020 Open Knowledge Brasil
+
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+"""Representations of concepts used by other utilities.
+
+This module contains reusable types and classes that model both portals where
+Brazilian official gazettes are published and their attributes.
+"""
+
+import itertools
+import logging
+import os
+from asyncio.exceptions import TimeoutError
+from collections import UserList
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from enum import Enum
+from typing import Any, List, Literal, NewType, Optional, Set, Union
+
+import aiohttp
+from aiohttp import ClientConnectorCertificateError, ClientError, ClientTimeout
+from yarl import URL
+
+AcceptedCallback = Literal["kaggle"]
+AcceptedSource = Literal["census"]
+ExistingBehavior = Literal["append", "replace", "skip"]
+FetchMode = Literal["ping", "source"]
+AcceptedHttpMethod = Literal["GET", "HEAD"]
+IbgeCode = NewType("IbgeCode", int)  # TODO: make it a UserString
+LogLevel = Literal["error", "warning", "info", "debug"]
+PathLike = Union[str, bytes, "os.PathLike[Any]"]
+
+
+class GovernmentBranch(Enum):
+    """An enumeration of government branches in Brazil."""
+
+    EXECUTIVE = 1  # only the Executive branch is currently supported
+    # LEGISLATIVE = 2
+    # JUDICIAL = 3
+    # ESSENTIAL_JUSTICE = 4
+
+
+class GovernmentLevel(Enum):
+    """An enumeration of government levels in Brazil."""
+
+    # FEDERAL = 1
+    # STATE = 2  # includes Federal District
+    MUNICIPALITY = 3  # only Municipalities are currently supported
+
+
+@dataclass
+class Portal:
+    """Representation of a portal that publishes local-level official gazettes.
+    """
+
+    ibge_code: IbgeCode
+    url: URL
+    branch: GovernmentBranch = GovernmentBranch.EXECUTIVE
+    level: GovernmentLevel = GovernmentLevel.MUNICIPALITY
+
+
+@dataclass
+class PortalCapture:
+    """Capture of an official gazette publication portal at a point in time."""
+
+    ibge_code: IbgeCode
+    request_time: datetime
+    waiting_time: timedelta
+    attempts: int
+    initial_url: URL
+    final_url: Optional[URL]
+    method: Literal["GET", "POST"]
+    ssl_valid: bool
+    status: int
+    message: str
+    level: GovernmentLevel = GovernmentLevel.MUNICIPALITY
+    branch: GovernmentBranch = GovernmentBranch.EXECUTIVE
+
+    def to_dict(self):
+        """Converts a PortalCapture into a dictionary."""
+        return {
+            "ibge_code": str(self.ibge_code),
+            "request_time": self.request_time.isoformat(),
+            "waiting_time": self.waiting_time.total_seconds(),
+            "attempts": self.attempts,
+            "initial_url": str(self.initial_url),
+            "final_url": str(self.final_url or ""),
+            "method": self.method,
+            "ssl_valid": int(self.ssl_valid),
+            "status": self.status,
+            "message": self.message,
+            "level": self.level,
+            "branch": self.branch,
+        }
+
+
+class PortalList(UserList):
+    """A list of official portals."""
+
+    def by_domain(self) -> List["PortalList"]:
+        """Separate a list of portals by their domains.
+
+        This function creates a list populated with sets of unique portals that
+        have all the same domain in their URLs.
+
+        Parameters:
+            portals: An iterable of `Portal` instances
+
+        Returns:
+            A list of `PortalList`s, one for each domain in the original
+            instance.
+        """
+
+        logging.debug("Separating portals according to their domains...")
+
+        # collect all unique domains
+        domains = set(portal.url.host for portal in self.data)
+
+        # iterate over domains and check which portals belong to them
+        separated = list()
+        for domain in domains:
+            portals_in_domain = PortalList(
+                portal for portal in self.data if portal.url.host == domain
+            )
+
+            separated.append(portals_in_domain)  # add to separated list
+
+        return separated
+
+    async def fetch_all(
+        self,
+        method: Literal["GET", "HEAD"] = "HEAD",
+        timeout: float = 10.0,
+        max_retries: int = 3,
+    ) -> List[PortalCapture]:
+
+        logging.info(f"Fetching {len(self.data)} portals ('{method}')...")
+
+        # create an empty list of responses data and metadata
+        responses: List[dict] = list()
+
+        # remove url duplicates
+        unique_urls: Set[URL] = set(portal.url for portal in self.data)
+
+        client_timeout = ClientTimeout(total=timeout)
+
+        async with aiohttp.ClientSession(
+            timeout=client_timeout, trust_env=True
+        ) as client:
+
+            # iterate over portal URLs
+            for url in unique_urls:
+
+                # configure request
+                ssl_valid: bool = True  # start assuming so
+
+                # try fetching page
+                attempt: int = 1
+                while attempt <= max_retries:
+                    try:
+                        logging.info(
+                            f"Sending request to <{url}> "
+                            + f"({attempt}/{max_retries})..."
+                        )
+                        request_time: datetime = datetime.now(timezone.utc)
+
+                        async with client.request(
+                            method, url=str(url), ssl=ssl_valid
+                        ) as response:
+                            time_elapsed: timedelta = (
+                                datetime.now(timezone.utc) - request_time
+                            )
+                            final_url: Optional[URL] = response.url
+                            response_status: int = response.status
+                            if method == "GET":
+                                # TODO: get charsets defined in <meta> tags
+                                message: Any = str(await response.text())
+                            else:
+                                message = response.reason
+                            if not response.ok and attempt <= max_retries:
+                                attempt += 1
+                                continue
+
+                    # Invalid SSL certificate; try again without verifying
+                    except ClientConnectorCertificateError:
+                        ssl_valid = False
+                        if attempt < max_retries:
+                            continue
+
+                    # some other error; try again
+                    except (
+                        ClientError,
+                        TimeoutError,
+                        UnicodeDecodeError,
+                    ) as err:
+                        time_elapsed = (
+                            datetime.now(timezone.utc) - request_time
+                        )
+                        message = repr(err)
+                        final_url = None
+                        response_status = 999
+                        if attempt < max_retries:
+                            attempt += 1
+                            continue
+
+                    # record answer if it is OK or exceeded max tries
+                    logging.info(f"<{url}>: {message} ({response_status})")
+                    responses.append(
+                        {
+                            "initial_url": url,
+                            "final_url": final_url,
+                            "method": method,
+                            "attempts": attempt,
+                            "request_time": request_time,
+                            "waiting_time": time_elapsed,
+                            "ssl_valid": ssl_valid,
+                            "status": response_status,
+                            "message": message,
+                        }
+                    )
+                    break
+
+        # associate unique urls to portals
+        captures: List[PortalCapture] = list()
+        for portal, capture in itertools.product(self.data, responses):
+            if portal.url == capture["initial_url"]:
+                captures.append(
+                    PortalCapture(
+                        ibge_code=portal.ibge_code,
+                        level=portal.level.value,
+                        branch=portal.branch.value,
+                        **capture,
+                    )
+                )
+        return captures
diff --git a/utils/src/fetch_portals/sources.py b/utils/src/fetch_portals/sources.py
new file mode 100644
index 0000000..ad211a0
--- /dev/null
+++ b/utils/src/fetch_portals/sources.py
@@ -0,0 +1,72 @@
+# Copyright 2020 Open Knowledge Brasil
+
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+"""Functions to interact with sources of official gazettes portals.
+
+This module contains functions developed as a part of the `Censo Querido
+Diário`_ effort, in order to periodically fetch the contents and monitor the
+service availability of portals containing the official gazettes for the 5.526
+brazilian municipalities.
+
+The Censo Querido Diário is a collaborative effort to push forward the
+disclosure of public information embodied in official publications.
+Contributions to this initiative are more than welcome. Check our
+`contribution guidelines`_ (in portuguese) to learn the various ways you can
+support the project.
+
+.. _Censo Querido Diário:
+    https://censo.ok.org.br/sobre/
+
+.. _contribution guidelines:
+    https://github.com/okfn-brasil/censo-querido-diario/blob/main/CONTRIBUTING.MD
+"""
+
+import logging
+from typing import List
+
+import numpy as np
+import pandas as pd
+from yarl import URL
+
+from .models import IbgeCode, Portal, PortalList
+
+
+def get_portals_from_census() -> PortalList:
+    """Get a list of official gazettes portals from Querido Diario Census data.
+
+    Returns:
+        A list of `Portal`_ objects, containing the official Id for the city
+        and the portal URL.
+    """
+
+    logging.info("Getting census data...")
+
+    # download census full data
+    url: str = "https://censo.ok.org.br/get-data/"
+    df_census: pd.DataFrame = pd.read_csv(url)
+
+    # filter and process relevant data (cities geocodes and portal URLs)
+    logging.debug("Processing portals information...")
+    portals: List[Portal] = (
+        pd.wide_to_long(  # type: ignore
+            df_census, "fonte", i="IBGE7", j="fonte_num", sep="_"
+        )
+        .reset_index()
+        .dropna()
+        .apply(
+            # FIXME: avoid "None" strings in url column
+            lambda mun: Portal(
+                ibge_code=IbgeCode(mun["IBGE7"]), url=URL(mun["fonte"])
+            )
+            if mun.fonte != "None"
+            else np.nan,
+            axis=1,
+        )
+        .dropna()
+        .to_list()
+    )
+
+    return PortalList(portals)
diff --git a/utils/src/fetch_portals/test/__init__.py b/utils/src/fetch_portals/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/src/fetch_portals/test/conftest.py b/utils/src/fetch_portals/test/conftest.py
new file mode 100644
index 0000000..76f2524
--- /dev/null
+++ b/utils/src/fetch_portals/test/conftest.py
@@ -0,0 +1,73 @@
+# Copyright 2020 Open Knowledge Brasil
+
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+"""Reusable Pytest fixtures for testing fetch_portals package."""
+
+import json
+import os
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Generator
+
+import pytest
+from dotenv import load_dotenv
+
+
+@pytest.fixture(scope="session")
+def kaggle_api() -> "KaggleApi":  # type: ignore  # noqa: F821
+    """Initialize and authenticate connection to Kaggle API."""
+    # get set kaggle credentials as environment variables
+    script_path = Path(os.path.abspath(__file__))
+    load_dotenv(os.path.join(script_path.parents[3], ".env"))
+
+    # initialize api
+    from kaggle.api.kaggle_api_extended import KaggleApi  # type: ignore
+
+    api = KaggleApi()
+    api.authenticate()
+
+    return api
+
+
+@pytest.fixture(scope="session")
+def mock_kaggle_dataset(kaggle_api) -> Generator[str, None, None]:
+    """Creates a Kaggle dataset for testing purposes.
+
+    Note:
+        There is currently no method for programatically removing a Kaggle
+        dataset. Therefore, the user must manually delete the created dataset,
+        located at ``https://kaggle.com/myuser/example`` (where ``myuser`` is
+        the name of the Kaggle user provided through the ``KAGGLE_USER``
+        environment variable).
+
+    Yields:
+        ID of the created dataset, in the format ``myuser/example``.
+    """
+    kaggle_user = os.environ["KAGGLE_USERNAME"]
+    mock_data = """
+        "fruit_name","fruit_color","fruit_number"
+        apple,red,6
+        banana,yellow,12
+        plum,purple,5
+        """
+    try:
+        tmpdir = TemporaryDirectory()
+        metadata = {
+            "title": "Example Dataset",
+            "id": kaggle_user + "/example",
+            "licenses": [{"name": "CC0-1.0"}],
+        }
+        with open(
+            os.path.join(tmpdir.name, "datapackage.json"), "w"
+        ) as meta_file:
+            meta_json = json.dumps(metadata)
+            meta_file.write(meta_json)
+        with open(os.path.join(tmpdir.name, "example_fruits.csv"), "w") as f:
+            f.write(mock_data)
+        kaggle_api.dataset_create_new(tmpdir.name)
+        yield metadata["id"]  # type: ignore
+    finally:
+        tmpdir.cleanup()
diff --git a/utils/src/fetch_portals/test/test_callbacks.py b/utils/src/fetch_portals/test/test_callbacks.py
new file mode 100644
index 0000000..0a3c45d
--- /dev/null
+++ b/utils/src/fetch_portals/test/test_callbacks.py
@@ -0,0 +1,86 @@
+# Copyright 2020 Open Knowledge Brasil
+
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+"""Tests callback functions.
+
+This module contains test cases for checking whether the callback functions
+defined in the `callbacks.py`_ file are working as expected.
+"""
+
+from datetime import datetime, timedelta, timezone
+
+import pytest
+
+from ..callbacks import _autogen_version_notes, to_kaggle
+from ..models import PortalCapture
+
+
+@pytest.fixture
+def mock_captures():
+    """Creates a list of fake records to process and/or save."""
+    captures = [
+        PortalCapture(
+            ibge_code=2600807,
+            initial_url=(
+                "http://netuse.inf.br/altinho_pm/portaltransparencia/"
+                + "index.php?link=6"
+            ),
+            final_url=(
+                "http://netuse.inf.br/altinho_pm/portaltransparencia/"
+                + "index.php?link=6"
+            ),
+            method="HEAD",
+            attempts=1,
+            request_time=datetime.now(timezone.utc),
+            waiting_time=timedelta(seconds=1.1),
+            ssl_valid=True,
+            status=200,
+            message="OK",
+        ),
+        PortalCapture(
+            ibge_code=4200754,
+            initial_url=(
+                "https://diariomunicipal.sc.gov.br/site/"
+                + "?r=site/index&q=cod_entidade%3A13"
+            ),
+            final_url=(
+                "https://diariomunicipal.sc.gov.br/site/"
+                + "?r=site/index&q=cod_entidade%3A13"
+            ),
+            method="HEAD",
+            attempts=1,
+            request_time=datetime.now(timezone.utc),
+            waiting_time=timedelta(seconds=0.92),
+            ssl_valid=True,
+            status=200,
+            message="OK",
+        ),
+    ]
+
+    return captures
+
+
+def test_autogen_version_notes():
+    """Tests generating a default version note message."""
+    expected_notes = {
+        "create": "Create example.csv",
+        "append": "Add records to example.csv",
+        "update": "Update example.csv",
+    }
+    for operation, expected_note in expected_notes.items():
+        version_note = _autogen_version_notes("example.csv", operation)
+        assert version_note == expected_note
+
+
+def test_to_kaggle(mock_captures, mock_kaggle_dataset, kaggle_api):
+    """Tests saving some records to Kaggle"""
+    to_kaggle(
+        mock_captures,
+        dataset=mock_kaggle_dataset,
+        dest_file="example.csv",
+    )
+    file_list = kaggle_api.dataset_list_files(mock_kaggle_dataset)
+    assert "example.csv" in [str(datafile) for datafile in file_list.files]
diff --git a/utils/src/fetch_portals/test/test_fetchers.py b/utils/src/fetch_portals/test/test_fetchers.py
new file mode 100644
index 0000000..9defbc6
--- /dev/null
+++ b/utils/src/fetch_portals/test/test_fetchers.py
@@ -0,0 +1,127 @@
+# Copyright 2020 Open Knowledge Brasil
+
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+import asyncio
+from typing import List, Set
+
+import pytest
+from yarl import URL
+
+from ..fetchers import fetch_portals
+from ..models import IbgeCode, Portal, PortalCapture, PortalList
+
+
+@pytest.fixture
+def example_portals() -> PortalList:
+    """Create a `PortalList`_ instance with a few official gazette portals."""
+
+    # Altinho (PE)
+    portal1: Portal = Portal(
+        ibge_code=IbgeCode(2600807),
+        url=URL(
+            "http://netuse.inf.br/altinho_pm/portaltransparencia/index.php?"
+            + "link=6"
+        ),
+    )
+    portal2: Portal = Portal(
+        ibge_code=IbgeCode(2600807),
+        url=URL("http://www.diariomunicipal.com.br/amupe/"),
+    )
+
+    # Alto Bela Vista (SC)
+    portal3: Portal = Portal(
+        ibge_code=IbgeCode(4200754),
+        url=URL(
+            "https://diariomunicipal.sc.gov.br/site/"
+            + "?r=site/index&q=cod_entidade%3A13"
+        ),
+    )
+
+    # Anchieta (SC)
+    portal4: Portal = Portal(
+        ibge_code=IbgeCode(4200804),
+        url=URL(
+            "https://diariomunicipal.sc.gov.br/site/"
+            + "?r=site/index&q=cod_entidade%3A14"
+        ),
+    )
+
+    # Angelim (PE)
+    portal5: Portal = Portal(
+        ibge_code=IbgeCode(2601003),
+        url=URL("http://www.diariomunicipal.com.br/amupe/pesquisar"),
+    )
+    portal6 = Portal(
+        ibge_code=IbgeCode(2601003),
+        url=URL(
+            "http://174.142.65.52:16444/transparencia/angelim/prefeitura/"
+            + "legislacaomunicipal.faces"
+        ),
+    )
+    portal7 = Portal(
+        ibge_code=IbgeCode(2601003),
+        url=URL(
+            "http://174.142.65.52:16444/transparencia/angelim/prefeitura/"
+            + "outrosatos.faces"
+        ),
+    )
+
+    return PortalList(
+        [portal1, portal2, portal3, portal4, portal5, portal6, portal7]
+    )
+
+
+def test_split_by_domain(example_portals) -> None:
+    """Tests spliting `PortalList`_s into instances with an unique domain each.
+    """
+    splitted: PortalList = example_portals.by_domain()
+    for subset in splitted:
+        domains: Set[str] = set(portal.url.host for portal in subset)
+        assert len(domains) == 1
+
+
+def test_head_subsets(example_portals) -> None:
+    """Tests pinging subsets of a `PortalList`_ with unique domains."""
+    subsets: PortalList = example_portals.by_domain()
+    for subset in subsets:
+        subset = PortalList(subset)
+        captures: List[PortalCapture] = asyncio.run(
+            subset.fetch_all(method="HEAD", timeout=30)
+        )
+        assert len(captures) == len(subset)
+        for capture in captures:
+            assert isinstance(capture, PortalCapture)
+
+
+def test_get_subsets(example_portals) -> None:
+    """Tests capturing subsets of a `PortalList`_ with unique domains."""
+    subsets: PortalList = example_portals.by_domain()
+    for subset in subsets:
+        subset = PortalList(subset)
+        captures: List[PortalCapture] = asyncio.run(
+            subset.fetch_all(method="GET", timeout=30)
+        )
+        assert len(captures) == len(subset)
+        for capture in captures:
+            assert isinstance(capture, PortalCapture)
+
+
+def test_orchestrate_pinging(example_portals) -> None:
+    """Tests asynchronously pinging multiple portals."""
+    captures: List[PortalCapture] = fetch_portals(example_portals, mode="ping")
+    assert len(captures) == len(example_portals)
+    for capture in captures:
+        assert isinstance(capture, PortalCapture)
+
+
+def test_orchestrate_sourcing(example_portals) -> None:
+    """Tests asynchronously getting source code for multiple portals."""
+    captures: List[PortalCapture] = fetch_portals(
+        example_portals, mode="source"
+    )
+    assert len(captures) == len(example_portals)
+    for capture in captures:
+        assert isinstance(capture, PortalCapture)
diff --git a/utils/src/fetch_portals/test/test_main.py b/utils/src/fetch_portals/test/test_main.py
new file mode 100644
index 0000000..b3467d4
--- /dev/null
+++ b/utils/src/fetch_portals/test/test_main.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+
+# Copyright 2020 Open Knowledge Brasil
+
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+"""Tests callback functions.
+
+This module contains test cases for checking whether the callback functions
+defined in the `callbacks.py`_ file are working as expected.
+"""
+
+import os
+from tempfile import TemporaryDirectory
+
+import pandas as pd
+
+from ..main import main
+
+
+def test_ping(capsys):
+    """Tests pinging all portals in Querido Diario Census."""
+    main(mode="ping", callback=None)
+    out, err = capsys.readouterr()
+    assert '"ibge_code": "2600807"' in out
+
+
+def test_source(capsys):
+    """Tests getting source codes for all portals in Querido Diario Census."""
+    main(mode="source", callback=None)
+    out, err = capsys.readouterr()
+    assert '"ibge_code": "2600807"' in out
+    assert "<html>" in out
+
+
+def test_ping_to_kaggle(mock_kaggle_dataset, kaggle_api):
+    """Tests saving pings to all portals in QD Census to Kaggle."""
+    # copy original kaggle dataset config (that should not to be modified)
+    previous_kaggle_dataset = os.getenv("KAGGLE_DATASET")
+    previous_kaggle_file = os.getenv("KAGGLE_FILE")
+
+    # upload data to mock dataset
+    try:
+        os.environ["KAGGLE_DATASET"] = mock_kaggle_dataset
+        os.environ["KAGGLE_FILE"] = "test-ping.csv"
+        main(mode="ping", callback="kaggle", existing="append")
+        with TemporaryDirectory() as tmpdir:
+            kaggle_api.dataset_download_file(
+                mock_kaggle_dataset, os.environ["KAGGLE_FILE"], tmpdir
+            )
+            df = pd.read_csv(os.path.join(tmpdir, os.environ["KAGGLE_FILE"]))
+            for col in [
+                "ibge_code",
+                "request_time",
+                "waiting_time",
+                "attempts",
+                "initial_url",
+                "final_url",
+                "method",
+                "ssl_valid",
+                "status",
+                "message",
+                "level",
+                "branch",
+            ]:
+                assert col in df.columns
+            assert len(df.index) > 3
+            assert "200" in df["status"].unique()
+            assert "OK" in df["message"].unique()
+
+    # reset kaggle dataset config to the original one
+    finally:
+        if previous_kaggle_dataset:
+            os.environ["KAGGLE_DATASET"] = previous_kaggle_dataset
+        else:
+            del os.environ["KAGGLE_DATASET"]
+        if previous_kaggle_file:
+            os.environ["KAGGLE_FILE"] = previous_kaggle_file
+        else:
+            del os.environ["KAGGLE_FILE"]
+
+
+def test_source_to_kaggle(mock_kaggle_dataset, kaggle_api):
+    """Tests saving source codes for all portals in QD Census to Kaggle."""
+    # copy original kaggle dataset config (that should not to be modified)
+    previous_kaggle_dataset = os.getenv("KAGGLE_DATASET")
+    previous_kaggle_file = os.getenv("KAGGLE_FILE")
+
+    # upload data to mock dataset
+    try:
+        os.environ["KAGGLE_DATASET"] = mock_kaggle_dataset
+        os.environ["KAGGLE_FILE"] = "test-source.csv"
+        main(mode="source", callback="kaggle")
+        with TemporaryDirectory() as tmpdir:
+            kaggle_api.dataset_download_file(
+                mock_kaggle_dataset, os.environ["KAGGLE_FILE"], tmpdir
+            )
+            try:
+                df = pd.read_csv(
+                    os.path.join(tmpdir, os.environ["KAGGLE_FILE"])
+                )
+            except FileNotFoundError:
+                df = pd.read_csv(
+                    os.path.join(tmpdir, os.environ["KAGGLE_FILE"] + ".zip")
+                )
+            for col in [
+                "ibge_code",
+                "request_time",
+                "waiting_time",
+                "attempts",
+                "initial_url",
+                "final_url",
+                "method",
+                "ssl_valid",
+                "status",
+                "message",
+                "level",
+                "branch",
+            ]:
+                assert col in df.columns
+            assert len(df.index) > 3
+            assert df["message"].apply(lambda msg: "<html>" in str(msg)).any()
+
+    # reset kaggle dataset config to the original one
+    finally:
+        if previous_kaggle_dataset:
+            os.environ["KAGGLE_DATASET"] = previous_kaggle_dataset
+        else:
+            del os.environ["KAGGLE_DATASET"]
+        if previous_kaggle_file:
+            os.environ["KAGGLE_FILE"] = previous_kaggle_file
+        else:
+            del os.environ["KAGGLE_FILE"]
diff --git a/utils/src/fetch_portals/test/test_sources.py b/utils/src/fetch_portals/test/test_sources.py
new file mode 100644
index 0000000..1743b7b
--- /dev/null
+++ b/utils/src/fetch_portals/test/test_sources.py
@@ -0,0 +1,20 @@
+# Copyright 2020 Open Knowledge Brasil
+
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+
+from ..models import Portal, PortalList
+from ..sources import get_portals_from_census
+
+
+def test_get_portals_from_census() -> None:
+    """Test getting a list of official gazettes portals from the QD census."""
+    portals = get_portals_from_census()
+    assert len(portals) >= 326  # there are at least 324 mapped portals
+    assert isinstance(portals, PortalList)
+    for portal in portals:
+        assert isinstance(portal, Portal)
+        # assert len(portal.ibge_code) == 7
+        assert len(str(portal.url.host or '')) > 5