Skip to content

Commit

Permalink
added kin8mn preprocess
Browse files Browse the repository at this point in the history
  • Loading branch information
velezbeltran committed Aug 1, 2024
1 parent 9581f86 commit adc9f4d
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 1 deletion.
2 changes: 1 addition & 1 deletion testbed/src/testbed/data/links.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@
"news": "https://archive.ics.uci.edu/static/public/332/online+news+popularity.zip",
"power": "https://archive.ics.uci.edu/static/public/294/combined+cycle+power+plant.zip",
"superconductor": "https://archive.ics.uci.edu/static/public/464/superconductivty+data.zip",
"wave": "https://archive.ics.uci.edu/static/public/494/wave+energy+converters.zip"
"wave": "https://archive.ics.uci.edu/static/public/494/wave+energy+converters.zip",
}
27 changes: 27 additions & 0 deletions testbed/src/testbed/data/uci/kin8nm/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import argparse
from pathlib import Path
import numpy as np
import zipfile
import pandas as pd

def main(path_raw_dataset_dir: Path):
# Process the raw data file named temp.csv
raw_data_path = path_raw_dataset_dir / "data.zip"
# unzip the data
with zipfile.ZipFile(raw_data_path, 'r') as zip_ref:
zip_ref.extractall(path_raw_dataset_dir)
raw_data_path = path_raw_dataset_dir / "data.csv"

df = pd.read_csv(raw_data_path)
y = df["y"].values
x = df.drop(columns=["y"]).values
categorical = []

# Save preprocessed data
np.save(path_raw_dataset_dir.parent / "data.npy", {"x": x, "y": y, "categorical": categorical})

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("path", type=Path)
args = parser.parse_args()
main(args.path)
Binary file added testbed/src/testbed/data/uci/kin8nm/raw/data.zip
Binary file not shown.

0 comments on commit adc9f4d

Please sign in to comment.