Skip to content

Commit

Permalink
Merge pull request #119 from dagster-io/izzy/fix_hooli_basics
Browse files Browse the repository at this point in the history
Fix hooli_basics bug
  • Loading branch information
izzye84 committed Aug 26, 2024
2 parents 61fa9e7 + 6180855 commit fe8a6d4
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deploy-dagster-cloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
if: steps.prerun.outputs.result != 'skip'
run: |
pip install pip --upgrade;
pip install dagster-dbt dagster-cloud dbt-core dbt-duckdb dbt-snowflake --upgrade --upgrade-strategy eager;
pip install dagster-dbt dagster-cloud dbt-core dbt-duckdb dbt-snowflake grpcio-health-checking==1.64.3 --upgrade --upgrade-strategy eager;
make deps
dagster-dbt project prepare-and-package --file hooli_data_eng/project.py
dagster-cloud ci dagster-dbt project manage-state --file hooli_data_eng/project.py --source-deployment data-eng-prod
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,5 @@ tmp*/
# dbt
dbt_project/example.duckdb*
dbt_project/logs

.DS_Store
12 changes: 10 additions & 2 deletions hooli_basics/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,23 @@
)
def country_stats() -> DataFrame:
df = read_html("https://tinyurl.com/mry64ebh", flavor='html5lib')[0]
df.columns = ["country", "continent", "region", "pop_2022", "pop_2023", "pop_change"]
df.columns = ["country", "Population (1 July 2022)", "Population (1 July 2023)", "Change", "UN Continental Region[1]", "UN Statistical Subregion[1]"]
df = df.drop(columns=["Change"])
df = df.rename(columns={
"UN Continental Region[1]": "continent",
"UN Statistical Subregion[1]": "region",
"Population (1 July 2022)": "pop_2022",
"Population (1 July 2023)": "pop_2023",
}
)
df["pop_change"] = ((to_numeric(df["pop_2023"]) / to_numeric(df["pop_2022"])) - 1)*100
return df

@asset_check(
asset=country_stats
)
def check_country_stats(country_stats):
return AssetCheckResult(success=True)
return AssetCheckResult(passed=True)

@asset(
compute_kind="Kubernetes",
Expand Down
1 change: 1 addition & 0 deletions hooli_basics/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pandas
beautifulsoup4
html5lib
scikit-learn
dagster
Expand Down
6 changes: 4 additions & 2 deletions hooli_data_eng/assets/forecasting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@
from scipy import optimize

from dagster import (
AssetIn,
asset,
AssetKey,
AssetIn,
MonthlyPartitionsDefinition,
Output,
Field,
Config,
AssetExecutionContext,
MaterializeResult,
SourceAsset,
)
from dagster_k8s import PipesK8sClient
from dagstermill import define_dagstermill_asset
Expand Down Expand Up @@ -283,4 +285,4 @@ def k8s_pod_asset(
namespace="data-eng-prod",
base_pod_spec=pod_spec,
extras=extras,
).get_materialize_result()
).get_materialize_result()

0 comments on commit fe8a6d4

Please sign in to comment.