Skip to content

Commit

Permalink
refactor(model): read training data in the local filesystem w pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
ireneisdoomed committed Feb 13, 2025
1 parent 58f35d9 commit d45acea
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/gentropy/l2g.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def __init__(
# Load common inputs
self.credible_set = StudyLocus.from_parquet(
session, credible_set_path, recursiveFileLookup=True
)
).filter(f.col("studyLocusId") == "2089b267ff0a27715af4b75d81abd834")
self.feature_matrix = L2GFeatureMatrix(
_df=session.load_data(feature_matrix_path),
)
Expand Down
11 changes: 8 additions & 3 deletions src/gentropy/method/l2g/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import json
import logging
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any
Expand Down Expand Up @@ -92,12 +93,16 @@ def load_from_disk(
try:
# Try loading the training data if it is in the model directory
training_data = L2GFeatureMatrix(
_df=session.load_data(
(Path(path) / "training_data.parquet").as_posix()
_df=session.spark.createDataFrame(
# Parquet is read with Pandas to easily read local files
pd.read_parquet(
(Path(path) / "training_data.parquet").as_posix()
)
),
features_list=kwargs.get("features_list"),
)
except Exception:
except Exception as e:
logging.error("Training data set to none. Error: %s", e)
training_data = None

if not loaded_model._is_fitted():
Expand Down

0 comments on commit d45acea

Please sign in to comment.