Skip to content

Commit

Permalink
structure de pipeline br_camara_dados_abertos.proposicao
Browse files Browse the repository at this point in the history
  • Loading branch information
tricktx committed Jan 6, 2024
1 parent 207f14d commit 7464543
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 1 deletion.
10 changes: 9 additions & 1 deletion pipelines/datasets/br_camara_dados_abertos/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class constants(Enum):
INPUT_PATH = "/tmp/input/"
OUTPUT_PATH = "/tmp/output/"

ANOS = [2023]
ANOS = [2024]

TABLE_LIST = {
"votacao_microdados": "votacoes",
Expand Down Expand Up @@ -53,3 +53,11 @@ class constants(Enum):
"deputado_ocupacao": "https://docs.google.com/spreadsheets/d/1Cj6WE3jk63p21IjrINeaYKoMSOGoDDf1XpY3UH8sct4/edit#gid=0",
"deputado_profissao": "https://docs.google.com/spreadsheets/d/12R2OY7eqUKxuojcpYYBsCiHyzUOLBBdObnkuv2JUMNI/edit#gid=0",
}

# ------------------------------------------------------------> PROPOSIÇÃO

TABLE_LIST_PROPOSICAO = {
"proposicao_microdados": "proposicoes",
"proposicao_autor": "proposicoesAutores",
"proposicao_tema": "proposicoesTemas",
}
12 changes: 12 additions & 0 deletions pipelines/datasets/br_camara_dados_abertos/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pipelines.datasets.br_camara_dados_abertos.utils import (
get_data,
get_data_deputados,
get_data_proposicao_microdados,
read_and_clean_camara_dados_abertos,
read_and_clean_data_deputados,
)
Expand Down Expand Up @@ -87,3 +88,14 @@ def treat_and_save_table(table_id):
log(f"{constants_camara.OUTPUT_PATH.value}{table_id}/data.csv")

return f"{constants_camara.OUTPUT_PATH.value}{table_id}/data.csv"


# -------------------------------------------------------------------> PROPOSIÇÃO


@task
def get_date_proposicao():
df = get_data_proposicao_microdados()
max_data_proposicao = df["dataApresentacao"].max()

return max_data_proposicao
42 changes: 42 additions & 0 deletions pipelines/datasets/br_camara_dados_abertos/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,45 @@ def get_data_deputados():
)

return df


# ----------------------------------------------------------------------------------- > Proposição


def download_csvs_camara_proposicao() -> None:
"""
Docs:
This function does download all csvs from archives of camara de proposição.
The csvs saved in conteiners of docker.
return:
None
"""
print("Downloading csvs from camara de proposição")
if not os.path.exists(constants.INPUT_PATH.value):
os.makedirs(constants.INPUT_PATH.value)

for anos in constants.ANOS.value:
for key, valor in constants.TABLE_LIST_PROPOSICAO.value.items():
url_2 = f"http://dadosabertos.camara.leg.br/arquivos/{valor}/csv/{valor}-{anos}.csv"

response = requests.get(url_2)
if response.status_code == 200:
with open(constants.INPUT_PATH.value, "wb") as f:
f.write(response.content)
print("donwload complet")

elif response.status_code >= 400 and response.status_code <= 599:
raise Exception(
f"Erro de requisição: status code {response.status_code}"
)


def get_data_proposicao_microdados():
download_csvs_camara_proposicao()
df = pd.read_csv(
f'{constants.INPUT_PATH.value}{constants.TABLE_LIST_PROPOSICAO.value["proposicao_microdados"]-{constants.ANOS.value}}.csv',
sep=";",
)

return df

0 comments on commit 7464543

Please sign in to comment.