From ed8e16d09cf5aadbd650efda2ed3dabbc0611e96 Mon Sep 17 00:00:00 2001 From: Jerome Dockes Date: Fri, 26 Apr 2024 13:30:01 +0200 Subject: [PATCH] add ames housing --- doc/make_doc.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/make_doc.py b/doc/make_doc.py index fe36c95..f1029ea 100755 --- a/doc/make_doc.py +++ b/doc/make_doc.py @@ -5,6 +5,7 @@ from pathlib import Path import polars as pl +import pandas as pd from skrub import datasets as skrub_data from sklearn import datasets as sklearn_data from skrubview import Report @@ -18,16 +19,17 @@ reports_dir = build_dir / "reports" reports_dir.mkdir() - +AMES_HOUSING_CSV = "https://www.openml.org/data/get_csv/20649135/file2ed11cebe25.arff" +datasets = [(pd.read_csv(AMES_HOUSING_CSV), "AMES Housing")] skrub_dataset_names = [ "employee_salaries", "medical_charge", "traffic_violations", - "drug_directory" -] -datasets = [ - (getattr(skrub_data, f"fetch_{name}")().X, name) for name in skrub_dataset_names + "drug_directory", ] +datasets.extend( + [(getattr(skrub_data, f"fetch_{name}")().X, name) for name in skrub_dataset_names] +) sklearn_dataset_names = ["titanic"] datasets.extend( @@ -42,6 +44,7 @@ ] ) + def add_report(df, name): print(f"making report for {name}", end="", flush=True) df = pl.from_pandas(df)