Skip to content

Commit

Permalink
Feat/new ruff config (#8)
Browse files Browse the repository at this point in the history
New ruff configuration.
  • Loading branch information
edgBR committed Jan 16, 2024
2 parents d8f5a86 + 85bb402 commit b16a946
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 9 deletions.
15 changes: 9 additions & 6 deletions code/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import logging
import logging # noqa: D100
import os
import time
from io import BytesIO
Expand Down Expand Up @@ -35,8 +35,8 @@


class ETLPipeline:
"""
Mock class that simulates a modulith to process from landing to bronze, silver and gold.
"""Mock class that simulates a modulith to process from landing to bronze, silver and gold.
It takes the assumption that we process one file at a time for simplification (mostly because
I did not want to write asyncio calls and deal with the connections in the ADLSGenClient)
"""
Expand All @@ -48,14 +48,13 @@ def __init__(self) -> None:
self.landing_client = self.adlsgen2_client.get_directory_client(file_system=LANDING_ZONE_PATH, directory='/')

def upload_to_landing(self, uri: str):
"""_summary_
"""Uploads the data to the landing zone.
Args:
uri (str): _description_
"""

try:
response = requests.get(url=uri)
response = requests.get(url=uri, timeout=120)
response.raise_for_status()
with ZipFile(BytesIO(response.content), 'r') as zip_ref:
# Assuming there is only one file in the zip archive
Expand All @@ -81,6 +80,7 @@ def upload_to_landing(self, uri: str):
raise e

def raw_to_bronze(self):
"""Append the data from the raw landing zone to the bronze layer."""
try:
storage_options_raw = {"account_name": ACCOUNT_NAME, "anon": False}
storage_options_raw_delta = {"account_name": ACCOUNT_NAME, "use_azure_cli": "True"}
Expand All @@ -106,6 +106,7 @@ def raw_to_bronze(self):
raise e

def bronze_to_silver(self):
"""Merge the data incrementally and into the silver table."""
try:
storage_options_raw_delta = {"account_name": ACCOUNT_NAME, "use_azure_cli": "True"}

Expand Down Expand Up @@ -142,9 +143,11 @@ def bronze_to_silver(self):
raise e

def silver_to_gold(self):
"""Aggregate the data in gold tables."""
return True

def _table_checker(self, container, options):
"""Internal method to check if the delta table exists."""
try:
delta_table = DeltaTable(table_uri=f"abfss://{container}/", storage_options=options)
logger_normal.info(f"Delta table version is {delta_table.version()}")
Expand Down
7 changes: 4 additions & 3 deletions code/minimal_local.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from datetime import datetime
from datetime import datetime # noqa: D100

import polars as pl
from deltalake import DeltaTable
from polars.io.delta import _convert_pa_schema_to_delta


def execute():
"""Executes the dummy processing pipeline."""
df = pl.DataFrame(
{
"sales_order_id": ["1000", "1001", "1002", "1003"],
Expand All @@ -22,7 +23,7 @@ def execute():
)
print(df)

df.write_delta("/tmp/sales_orders", mode="append")
df.write_delta("/tmp/sales_orders", mode="append") # noqa: S108

new_data = pl.DataFrame(
{
Expand All @@ -34,7 +35,7 @@ def execute():
}
)

dt = DeltaTable("/tmp/sales_orders")
dt = DeltaTable("/tmp/sales_orders") # noqa: S108
source = new_data.to_arrow()
delta_schema = _convert_pa_schema_to_delta(source.schema)
source = source.cast(delta_schema)
Expand Down
31 changes: 31 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ line-length = 120
[tool.ruff]
# Same as Black.
line-length = 120
indent-width = 4

# Assume Python 3.10
target-version = "py310"

exclude = [
"jupyter_notebook_config.py",
Expand All @@ -61,19 +65,31 @@ exclude = [
"node_modules",
"venv"]

extend-include = ["*.ipynb"]
select = [
"E", # pycodestyle errors (settings from FastAPI, thanks, @tiangolo!)
"D", # pydocstyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"I", # isort
"C", # flake8-comprehensions
"B", # flake8-bugbear
"N", # PEP8 naming
"S", # Bandit
"PL", # Pylint
]
ignore = [
"E501", # line too long, handled by black
"C901", # too complex
"D401", # not important for us
"D104", # not important for us (yet)
]

fixable = ["ALL"]
unfixable = []

dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[tool.ruff.lint.pydocstyle]
# Use numpy docstrings
convention = "numpy"
Expand All @@ -84,3 +100,18 @@ relative-imports-order = "closest-to-furthest"
extra-standard-library = ["typing"]
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
known-first-party = []

[tool.ruff.format]
# Like Black, use double quotes for strings.
quote-style = "double"
# Like Black, indent with spaces, rather than tabs.
indent-style = "space"
# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false
# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"

[tool.ruff.per-file-ignores]
# Ignore `E402` (import violations) in all `__init__.py` files, and in `path/to/file.py`.
"__init__.py" = ["E402"]
"path/to/file.py" = ["E402"]

0 comments on commit b16a946

Please sign in to comment.