Skip to content

Commit

Permalink
bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Fuest committed Sep 18, 2024
1 parent b5d3e25 commit 2c4a7a9
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 109 deletions.
2 changes: 1 addition & 1 deletion config/model_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ conditioning_vars: # for each desired conditioning variable, add the name and nu

diffcharge:
batch_size: 64
n_epochs: 1000
n_epochs: 10
init_lr: 3e-5
network: cnn # attention
guidance_scale: 1.2
Expand Down
2 changes: 1 addition & 1 deletion datasets/pecanstreet.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class PecanStreetDataManager:
def __init__(
self,
geography: str = None,
config_path: str = "config/data_config.yaml",
config_path: str = "EnData/config/data_config.yaml",
normalize: bool = False,
threshold: Union[Tuple[float, float], None] = None,
include_generation: bool = False,
Expand Down
13 changes: 6 additions & 7 deletions eval/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,7 @@ def __init__(self, real_dataset: Any, model_name: str, log_dir: str = "runs"):
"pred": [],
}

def evaluate_for_user(
self, user_id: int, differenced=False
) -> Tuple[np.ndarray, np.ndarray]:
def evaluate_for_user(self, user_id: int) -> Tuple[np.ndarray, np.ndarray]:
"""
Evaluate the model for a specific user.
Expand All @@ -75,7 +73,7 @@ def evaluate_for_user(
Returns:
Tuple[np.ndarray, np.ndarray]: Synthetic and real data for the user.
"""
user_dataset = self.real_dataset.create_user_dataset(user_id, differenced)
user_dataset = self.real_dataset.create_user_dataset(user_id)
model = self.get_trained_model_for_user(self.model_name, user_dataset)
user_log_dir = f"{self.writer.log_dir}/user_{user_id}"
user_writer = SummaryWriter(user_log_dir)
Expand Down Expand Up @@ -148,9 +146,10 @@ def evaluate_all_user_models(self):
real_data = []

for user_id in user_ids:
syn_user_data, real_user_data = self.evaluate_for_user(user_id)
syn_data.append(syn_user_data)
real_data.append(real_user_data)
if user_id == 3687:
syn_user_data, real_user_data = self.evaluate_for_user(user_id)
syn_data.append(syn_user_data)
real_data.append(real_user_data)

syn_data = np.expand_dims(np.concatenate(syn_data, axis=0), axis=-1)
real_data = np.expand_dims(np.concatenate(real_data, axis=0), axis=-1)
Expand Down
178 changes: 83 additions & 95 deletions generator/diffcharge/diffusion.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,69 @@
"""
This class is adapted/taken from the DiffCharge GitHub repository:
Repository: https://github.com/LSY-Cython/DiffCharge
Author: Siyang Li, Hui Xiong, Yize Chen (HKUST-GZ
License: None
Modifications (if any):
- Changes to conditioning logic
- Added classifier-free-guidance sampling
Note: Please ensure compliance with the repository's license and credit the original authors when using or distributing this code.
"""

import numpy as np
import scipy.signal as sig
import torch.utils.data
from torch import nn
import torch
import torch.nn as nn
from tqdm import tqdm

from datasets.utils import prepare_dataloader
from generator.diffcharge.network import *
from generator.conditioning import ConditioningModule
from generator.diffcharge.network import CNN
from generator.diffcharge.network import Attention
from generator.diffusion_ts.gaussian_diffusion import cosine_beta_schedule


class DDPM:
def __init__(self, opt):
super().__init__()
self.opt = opt
self.device = opt.device

# Initialize the conditioning module
self.conditioning_module = ConditioningModule(
categorical_dims=opt.categorical_dims,
embedding_dim=opt.cond_emb_dim,
device=opt.device,
)

# Initialize the epsilon model
if opt.network == "attention":
self.eps_model = Attention(opt).to(opt.device)
self.eps_model = Attention(opt).to(self.device)
else:
self.eps_model = CNN(opt).to(opt.device)
self.opt = opt
self.eps_model = CNN(opt).to(self.device)

self.n_steps = opt.n_steps
if opt.schedule == "linear":
schedule = opt.schedule
beta_start = opt.beta_start
beta_end = opt.beta_end

if schedule == "linear":
self.beta = torch.linspace(
opt.beta_start, opt.beta_end, opt.n_steps, device=opt.device
beta_start, beta_end, self.n_steps, device=self.device
)
elif opt.schedule == "cosine":
self.beta = cosine_beta_schedule(opt.n_steps)
elif schedule == "cosine":
self.beta = cosine_beta_schedule(self.n_steps).to(self.device)
else:
self.beta = (
torch.linspace(
opt.beta_start**0.5,
opt.beta_end**0.5,
opt.n_steps,
device=opt.device,
beta_start**0.5,
beta_end**0.5,
self.n_steps,
device=self.device,
)
** 2
)

self.alpha = 1.0 - self.beta
self.alpha_bar = torch.cumprod(self.alpha, dim=0)
# self.sigma2 = self.beta
self.sigma2 = torch.cat(
(
torch.tensor([self.beta[0]], device=opt.device),
self.beta[1:] * (1 - self.alpha_bar[0:-1]) / (1 - self.alpha_bar[1:]),
torch.tensor([self.beta[0]], device=self.device),
self.beta[1:] * (1 - self.alpha_bar[:-1]) / (1 - self.alpha_bar[1:]),
)
)
self.optimizer = torch.optim.Adam(self.eps_model.parameters(), lr=opt.init_lr)
self.loss_func = nn.MSELoss()
p1, p2 = int(0.75 * opt.n_epochs), int(0.9 * opt.n_epochs)
n_epochs = opt.n_epochs
p1, p2 = int(0.75 * n_epochs), int(0.9 * n_epochs)
self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
self.optimizer, milestones=[p1, p2], gamma=0.1
)
Expand All @@ -69,110 +73,94 @@ def gather(self, const, t):

def q_xt_x0(self, x0, t):
alpha_bar = self.gather(self.alpha_bar, t)
mean = (alpha_bar**0.5) * x0
mean = (alpha_bar.sqrt()) * x0
var = 1 - alpha_bar
return mean, var

def q_sample(self, x0, t, eps):
mean, var = self.q_xt_x0(x0, t)
return mean + (var**0.5) * eps
return mean + var.sqrt() * eps

def p_sample(self, xt, c, t, guidance_scale=1.0):
eps_theta_cond = self.eps_model(xt, c, t)
eps_theta_uncond = self.eps_model(xt, torch.zeros_like(c, device=c.device), t)
eps_theta_uncond = self.eps_model(xt, torch.zeros_like(c), t)
eps_theta = eps_theta_uncond + guidance_scale * (
eps_theta_cond - eps_theta_uncond
)
alpha_bar = self.gather(self.alpha_bar, t)
alpha = self.gather(self.alpha, t)
eps_coef = (1 - alpha) / (1 - alpha_bar) ** 0.5
mean = (xt - eps_coef * eps_theta) / (alpha**0.5)
eps_coef = (1 - alpha) / (1 - alpha_bar).sqrt()
mean = (xt - eps_coef * eps_theta) / alpha.sqrt()
var = self.gather(self.sigma2, t)
if (t == 0).all():
z = torch.zeros(xt.shape, device=xt.device)
z = torch.zeros_like(xt)
else:
z = torch.randn(xt.shape, device=xt.device)
return mean + (var**0.5) * z
z = torch.randn_like(xt)
return mean + var.sqrt() * z

def cal_loss(self, x0, c, drop_prob=0.15): # (B, L, 1)
def cal_loss(self, x0, c, drop_prob=0.15):
batch_size = x0.shape[0]
t = torch.randint(
0, self.n_steps, (batch_size,), device=x0.device, dtype=torch.long
)
t = torch.randint(0, self.n_steps, (batch_size,), device=self.device)
noise = torch.randn_like(x0)
xt = self.q_sample(x0, t, eps=noise)

if torch.rand(1).item() < drop_prob: # randomly drop conditioning
c = torch.zeros_like(c, device=c.device)
# Randomly drop conditioning
if torch.rand(1).item() < drop_prob:
c = torch.zeros_like(c)

eps_theta = self.eps_model(xt, c, t)
return self.loss_func(noise, eps_theta)

def sample(self, n_samples, condition, smooth=True, guidance_scale=1.0):
c = condition.to(self.opt.device)

with torch.no_grad():
self.eps_model.eval()
x = torch.randn([n_samples, self.opt.seq_len, self.opt.input_dim]).to(
self.opt.device
)
for j in tqdm(
range(0, self.n_steps, 1), desc=f"Sampling steps of {self.n_steps}"
):
t = torch.ones(n_samples, dtype=torch.long).to(self.opt.device) * (
self.n_steps - j - 1
)
x = self.p_sample(x, c, t, guidance_scale=guidance_scale)

if smooth:
for i in range(n_samples):
filtered_x = sig.medfilt(x[i].cpu().numpy(), kernel_size=(5, 1))
x[i] = torch.tensor(filtered_x, dtype=torch.float32).to(
self.opt.device
)

return x

def train_model(self, dataset):
batch_size = self.opt.batch_size
epoch_loss = []
train_loader = prepare_dataloader(dataset, batch_size)

for epoch in range(self.opt.n_epochs):
batch_loss = []
for i, (time_series_batch, month_label_batch, day_label_batch) in enumerate(
for i, (time_series_batch, categorical_vars) in enumerate(
tqdm(train_loader, desc=f"Epoch {epoch + 1}")
):
x0 = time_series_batch
c = torch.cat(
[day_label_batch.unsqueeze(1), month_label_batch.unsqueeze(1)],
dim=1,
).to(self.opt.device)
x0 = time_series_batch.to(self.device)
# Get conditioning vector
c = self.conditioning_module(categorical_vars)
self.optimizer.zero_grad()
loss = self.cal_loss(x0, c, drop_prob=0.1)
loss.backward()
self.optimizer.step()
batch_loss.append(loss.item())
epoch_loss.append(np.mean(batch_loss))
print(f"epoch={epoch}/{self.opt.n_epochs}, loss={epoch_loss[-1]}")
print(f"epoch={epoch + 1}/{self.opt.n_epochs}, loss={epoch_loss[-1]}")
self.lr_scheduler.step()

def generate(self, day_labels, month_labels):
num_samples = day_labels.shape[0]
shape = (num_samples, self.opt.seq_len, self.opt.input_dim)
return self._generate(
shape, [day_labels, month_labels], guidance_scale=self.opt.guidance_scale
)

def _generate(self, shape, labels, guidance_scale=1.0):
def sample(self, n_samples, categorical_vars, smooth=True, guidance_scale=1.0):
c = self.conditioning_module(categorical_vars).to(self.device)
with torch.no_grad():
c = torch.cat([label.unsqueeze(1) for label in labels], dim=1).to(
self.opt.device
)
samples = self.sample(
n_samples=shape[0],
condition=c,
smooth=True,
guidance_scale=guidance_scale,
self.eps_model.eval()
x = torch.randn(n_samples, self.opt.seq_len, self.opt.input_dim).to(
self.device
)
return samples
for j in tqdm(
range(self.n_steps), desc=f"Sampling steps of {self.n_steps}"
):
t = torch.full(
(n_samples,),
self.n_steps - j - 1,
dtype=torch.long,
device=self.device,
)
x = self.p_sample(x, c, t, guidance_scale=guidance_scale)
if smooth:
for i in range(n_samples):
filtered_x = sig.medfilt(x[i].cpu().numpy(), kernel_size=(5, 1))
x[i] = torch.tensor(filtered_x, dtype=torch.float32).to(self.device)
return x

def generate(self, categorical_vars):
num_samples = categorical_vars[next(iter(categorical_vars))].shape[0]
return self.sample(
n_samples=num_samples,
categorical_vars=categorical_vars,
smooth=True,
guidance_scale=self.opt.guidance_scale,
)
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ def evaluate_single_dataset_model(

def main():
# evaluate_individual_user_models("gpt", include_generation=False)
# evaluate_individual_user_models("acgan", include_generation=False)
evaluate_single_dataset_model("diffusion_ts", include_generation=False)
evaluate_individual_user_models("diffusion_ts", include_generation=False)
# evaluate_single_dataset_model("diffusion_ts", include_generation=False)


if __name__ == "__main__":
Expand Down
28 changes: 25 additions & 3 deletions tests/test_endata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,37 @@
from typing import List

import numpy as np
import torch
from sklearn.metrics import mean_squared_error

from datasets.pecanstreet import PecanStreetDataManager
from generator.gan import ACGAN
from generator.options import Options

TEST_CONFIG_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "test_data_config.yaml"
)


class TestGenerator(unittest.TestCase):
"""Test ACGAN Generator."""
"""Test Generators."""

def test_generator_output_shape(self):
pass
def test_acgan_output_shape(self):

opt = Options(model_name="acgan")
opt.device = "cpu" # Use CPU for testing purposes
generator = ACGAN(opt)

batch_size = 16
noise_dim = opt.noise_dim
noise = torch.randn(batch_size, noise_dim).to(opt.device)

generator.eval()
with torch.no_grad():
samples = generator(noise, labels)

expected_shape = (batch_size, opt.seq_len, opt.input_dim)
self.assertEqual(samples.shape, expected_shape)


class TestDataset(unittest.TestCase):
Expand All @@ -31,6 +48,11 @@ def test_data_manager(self):
data_manager = PecanStreetDataManager(
config_path=TEST_CONFIG_PATH, include_generation=False
)

data_manager = PecanStreetDataManager(
config_path=TEST_CONFIG_PATH, include_generation=True
)

assert data_manager.data.shape[0] > 0, "Dataframe not loaded correctly"
assert "timeseries" in data_manager.data.columns

Expand Down

0 comments on commit 2c4a7a9

Please sign in to comment.