Skip to content

Commit

Permalink
updated acgan training
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Fuest committed Sep 17, 2024
1 parent a350140 commit e4d4632
Show file tree
Hide file tree
Showing 10 changed files with 135 additions and 98 deletions.
8 changes: 4 additions & 4 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@ datasets:
data_columns: ["dataid",
"local_15min",
"car1",
"car2",
#"car2",
"grid",
"solar",
"solar2",
#"solar2",
]
metadata_columns: ["dataid",
"building_type",
#"pv",
"solar",
"pv",
"car1",
"city",
"state"
"state",
]
goinerdata:
path: "home/fuest/EnData/data/goinerdata/"
Expand Down
13 changes: 7 additions & 6 deletions config/model_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ noise_dim: 256
cond_emb_dim: 256
shuffle: True

categorical_conditioning_vars: # for each desired categorical conditioning variable, add the name and number of categories
conditioning_vars: # for each desired conditioning variable, add the name and number of categories
month: 12
day: 7
#pv: 2
#ev: 2
numerical_conditioning_vars: # for each desired numerical conditioning variable, add the name
- square_footage
weekday: 7
building_type: 3
solar: 2
car1: 2
city: 7
state: 3

diffcharge:
batch_size: 4
Expand Down
30 changes: 16 additions & 14 deletions datasets/pecanstreet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import yaml
from torch.utils.data import Dataset

from datasets.utils import encode_categorical_variables

warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Expand Down Expand Up @@ -77,7 +79,7 @@ def load_and_preprocess_data(
self,
) -> Tuple[pd.DataFrame, pd.DataFrame, Dict[int, bool]]:
"""
Loads and preprocesses the data, including filtering and normalization.
Loads and preprocesses the data, including filtering, normalization and handling of categorical metadata.
Returns:
Tuple[pd.DataFrame, pd.DataFrame, Dict[int, bool]]: Processed data, metadata, and user flags.
Expand All @@ -88,6 +90,8 @@ def load_and_preprocess_data(
user_flags = self._set_user_flags(metadata, data)
data = self._preprocess_data(data)
data = pd.merge(data, metadata, on="dataid", how="left")
data, mappings = encode_categorical_variables(data.fillna("no"))
self.mappings = mappings
return data, metadata, user_flags

def _load_full_data(self, path: str, columns: List[str]) -> pd.DataFrame:
Expand Down Expand Up @@ -240,6 +244,7 @@ def _preprocess_solar(self, data: pd.DataFrame) -> pd.DataFrame:
solar_data = self._apply_normalization(solar_data, "solar")

return solar_data


@staticmethod
def _merge_columns_into_timeseries(df: pd.DataFrame) -> pd.DataFrame:
Expand Down Expand Up @@ -363,6 +368,7 @@ def __len__(self) -> int:
int: The number of samples.
"""
return len(self.data)


def __getitem__(self, idx: int) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
"""
Expand All @@ -371,29 +377,25 @@ def __getitem__(self, idx: int) -> Tuple[torch.Tensor, Dict[str, torch.Tensor],
Returns:
Tuple[torch.Tensor, Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
- time_series: The time series data tensor.
- categorical_vars: Dictionary of categorical conditioning variables.
- numerical_vars: Dictionary of numerical conditioning variables.
- conditioning_vars: Dictionary of conditioning variables.
"""
sample = self.data.iloc[idx]
time_series = sample["timeseries"]

# Extract conditioning variables
categorical_vars = {
conditioning_vars = {
'month': torch.tensor(sample['month'], dtype=torch.long),
'weekday': torch.tensor(sample['weekday'], dtype=torch.long),
'building_type': torch.tensor(sample['building_type'], dtype=torch.long),
'pv': torch.tensor(sample['pv'], dtype=torch.long),
'ev': torch.tensor(sample['ev'], dtype=torch.long),
}

numerical_vars = {
'total_square_footage': torch.tensor(sample['total_square_footage'], dtype=torch.long),
'car1': torch.tensor(sample['car1'], dtype=torch.long),
'city': torch.tensor(sample['city'], dtype=torch.long),
'state': torch.tensor(sample['state'], dtype=torch.long),
'solar': torch.tensor(sample['solar'], dtype=torch.long)
}

return (
torch.tensor(time_series, dtype=torch.float32),
categorical_vars,
numerical_vars
conditioning_vars
)


Expand Down Expand Up @@ -435,15 +437,15 @@ def __init__(
self.is_pv_user = is_pv_user
self.include_generation = include_generation
self.metadata = metadata
self.include_user_metadata()
#self.include_user_metadata()

def include_user_metadata(self):
"""
Merges user metadata with the time series data.
"""
self.data = pd.merge(
left=self.data,
right=self.metadata[["dataid", "city", "pv", "car1"]],
right=self.metadata[["dataid", "city", "solar", "car1"]],
on="dataid",
)

Expand Down
39 changes: 39 additions & 0 deletions datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,42 @@ def split_dataset(dataset: Dataset, val_split: float = 0.1) -> Tuple[Dataset, Da
val_dataset = torch.utils.data.Subset(dataset, range(train_size, len(dataset)))

return train_dataset, val_dataset


def encode_categorical_variables(df):
"""
Encodes categorical variables in a DataFrame to integer codes.
Args:
df (pd.DataFrame): Input DataFrame containing categorical variables.
Returns:
df_encoded (pd.DataFrame): DataFrame with categorical variables encoded as integer codes.
mappings (dict): Dictionary mapping column names to their category-to-code mappings.
"""
df_encoded = df.copy()
mappings = {}

# Select columns with object or category data types
categorical_cols = df_encoded.select_dtypes(include=['object', 'category']).columns

for col in categorical_cols:
if col == "timeseries": # skip time series col
continue
# Convert column to 'category' dtype if not already
df_encoded[col] = df_encoded[col].astype('category')

# Create a mapping from categories to codes
category_to_code = dict(enumerate(df_encoded[col].cat.categories))
code_to_category = {v: k for k, v in category_to_code.items()}

# Replace categories with codes in the DataFrame
df_encoded[col] = df_encoded[col].cat.codes

# Save the mapping for the current column
mappings[col] = {
'category_to_code': {cat: code for code, cat in category_to_code.items()},
'code_to_category': code_to_category
}

return df_encoded, mappings
19 changes: 18 additions & 1 deletion eval/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def run_eval(
writer.close()
return syn_data_array_inv[:, :, 0], real_data_array_inv[:, :, 0]

def evaluate_all_users(self):
def evaluate_all_user_models(self):
"""
Evaluate the model for all users in the dataset.
"""
Expand Down Expand Up @@ -245,6 +245,23 @@ def evaluate_all_pv_users(self):

self.run_eval(dataset, model, user_writer, None)

def evaluate_all_users(self):
"""
Evaluate the model for all users in the same dataset.
"""
dataset = self.real_dataset
dataset.is_pv_user = False
model = self.get_trained_model_for_user(self.model_name, dataset)
user_log_dir = f"{self.writer.log_dir}/all_users"
user_writer = SummaryWriter(user_log_dir)

print("----------------------")
print("Starting evaluation for all users")
print("----------------------")

self.run_eval(dataset, model, user_writer, None)


def evaluate_all_non_pv_users(self):
"""
Evaluate the model for all non-PV users in the dataset.
Expand Down
15 changes: 4 additions & 11 deletions generator/conditioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,27 @@
import torch.nn.functional as F

class ConditioningModule(nn.Module):
def __init__(self, categorical_dims, numerical_dims, embedding_dim, device):
def __init__(self, categorical_dims, embedding_dim, device):
super(ConditioningModule, self).__init__()
self.embedding_dim = embedding_dim
self.device = device

self.category_embeddings = nn.ModuleDict({
name: nn.Embedding(num_categories + 1, embedding_dim).to(device) # +1 for 'missing' token
name: nn.Embedding(num_categories, embedding_dim).to(device)
for name, num_categories in categorical_dims.items()
})
self.numerical_projections = nn.ModuleDict({
name: nn.Linear(1, embedding_dim).to(device)
for name in numerical_dims
})
total_dim = (len(categorical_dims) + len(numerical_dims)) * embedding_dim
total_dim = len(categorical_dims) * embedding_dim
self.mlp = nn.Sequential(
nn.Linear(total_dim, 128),
nn.ReLU(),
nn.Linear(128, embedding_dim)
).to(device)

def forward(self, categorical_vars, numerical_vars):
def forward(self, categorical_vars):
embeddings = []
for name, embedding in self.category_embeddings.items():
var = categorical_vars[name].to(self.device)
embeddings.append(embedding(var))
for name, projection in self.numerical_projections.items():
var = numerical_vars[name].unsqueeze(1).to(self.device) # Ensure shape (batch_size, 1)
embeddings.append(projection(var))
conditioning_vector = torch.cat(embeddings, dim=1)
conditioning_vector = self.mlp(conditioning_vector)
return conditioning_vector
Loading

0 comments on commit e4d4632

Please sign in to comment.