Merge pull request #318 from singh96aman/bugs_and_doc

Issue #315 - Dense Network to work for non-Square images and Blocking Bugs
baler-collaboration · Oct 23, 2023 · c08a950 · c08a950
2 parents 76c91c5 + 11b41d8
commit c08a950
Show file tree

Hide file tree

Showing 5 changed files with 38 additions and 23 deletions.
diff --git a/baler/baler.py b/baler/baler.py
@@ -103,25 +103,30 @@ def perform_training(output_path, config, verbose: bool):
         config.test_size,
         config.apply_normalization,
         config.convert_to_blocks if hasattr(config, "convert_to_blocks") else None,
+        verbose,
     )
 
     if verbose:
         print("Training and testing sets normalized")
 
     try:
+        n_features = 0
         if config.data_dimension == 1:
             number_of_columns = train_set_norm.shape[1]
             config.latent_space_size = ceil(
                 number_of_columns / config.compression_ratio
             )
             config.number_of_columns = number_of_columns
+            n_features = number_of_columns
         elif config.data_dimension == 2:
             if config.model_type == "dense":
                 number_of_rows = train_set_norm.shape[1]
                 number_of_columns = train_set_norm.shape[2]
+                n_features = number_of_columns * number_of_rows
             else:
                 number_of_rows = original_shape[1]
                 number_of_columns = original_shape[2]
+                n_features = number_of_columns
             config.latent_space_size = ceil(
                 (number_of_rows * number_of_columns) / config.compression_ratio
             )
@@ -139,14 +144,16 @@ def perform_training(output_path, config, verbose: bool):
         assert number_of_columns == config.number_of_columns
 
     if verbose:
-        print(f"Intitalizing Model with Latent Size - {config.latent_space_size}")
+        print(
+            f"Intitalizing Model with Latent Size - {config.latent_space_size} and Features - {n_features}"
+        )
 
     device = helper.get_device()
     if verbose:
         print(f"Device used for training: {device}")
 
     model_object = helper.model_init(config.model_name)
-    model = model_object(n_features=number_of_columns, z_dim=config.latent_space_size)
+    model = model_object(n_features=n_features, z_dim=config.latent_space_size)
     model.to(device)
 
     if config.model_name == "Conv_AE_3D" and hasattr(
@@ -322,6 +329,7 @@ def perform_decompression(output_path, config, verbose: bool):
 
     start = time.time()
     model_name = config.model_name
+    data_before = np.load(config.input_path)["data"]
     decompressed, names, normalization_features = helper.decompress(
         model_path=os.path.join(output_path, "compressed_output", "model.pt"),
         input_path=os.path.join(output_path, "compressed_output", "compressed.npz"),
@@ -334,12 +342,12 @@ def perform_decompression(output_path, config, verbose: bool):
         model_name=model_name,
         config=config,
         output_path=output_path,
+        original_shape=data_before.shape,
     )
     if verbose:
         print(f"Model used: {model_name}")
 
-    if config.convert_to_blocks:
-        data_before = np.load(config.input_path)["data"]
+    if hasattr(config, "convert_to_blocks") and config.convert_to_blocks:
         print(
             "Converting Blocked Data into Standard Format. Old Shape - ",
             decompressed.shape,

diff --git a/baler/modules/data_processing.py b/baler/modules/data_processing.py
@@ -24,7 +24,9 @@
 
 
 def convert_to_blocks_util(blocks, data):
-    print("Converted Dataset to Blocks of Size - ", blocks)
+    print(
+        "Converted Dataset to Blocks of Size - ", blocks, " from original ", data.shape
+    )
     blocks = np.array(blocks)
     original_shape = np.array(data.shape)
     total_size = np.prod(original_shape)

diff --git a/baler/modules/helper.py b/baler/modules/helper.py
@@ -265,7 +265,14 @@ def normalize(data, custom_norm):
     return data
 
 
-def process(input_path, custom_norm, test_size, apply_normalization, convert_to_blocks):
+def process(
+    input_path,
+    custom_norm,
+    test_size,
+    apply_normalization,
+    convert_to_blocks,
+    verbose,
+):
     """Loads the input data into a ndarray, splits it into train/test splits and normalizes if chosen.
 
     Args:
@@ -279,6 +286,10 @@ def process(input_path, custom_norm, test_size, apply_normalization, convert_to_
     """
     loaded = np.load(input_path)
     data = loaded["data"]
+
+    if verbose:
+        print("Original Dataset Shape - ", data.shape)
+
     original_shape = data.shape
 
     if convert_to_blocks:
@@ -455,7 +466,7 @@ def compress(model_path, config):
     data_before = loaded["data"]
     original_shape = data_before.shape
 
-    if config.convert_to_blocks:
+    if hasattr(config, "convert_to_blocks") and config.convert_to_blocks:
         data_before = data_processing.convert_to_blocks_util(
             config.convert_to_blocks, data_before
         )
@@ -467,21 +478,24 @@ def compress(model_path, config):
         data = data_before
     number_of_columns = 0
     try:
-        print("compression ratio:", config.compression_ratio)
+        n_features = 0
         if config.data_dimension == 1:
             column_names = np.load(config.input_path)["names"]
             number_of_columns = len(column_names)
             config.latent_space_size = ceil(
                 number_of_columns / config.compression_ratio
             )
             config.number_of_columns = number_of_columns
+            n_features = number_of_columns
         elif config.data_dimension == 2:
             if config.model_type == "dense":
                 number_of_rows = data.shape[1]
                 config.number_of_columns = data.shape[2]
+                n_features = number_of_rows * config.number_of_columns
             else:
                 number_of_rows = original_shape[1]
                 config.number_of_columns = original_shape[2]
+                n_features = config.number_of_columns
             config.latent_space_size = ceil(
                 (number_of_rows * config.number_of_columns) / config.compression_ratio
             )
@@ -503,20 +517,11 @@ def compress(model_path, config):
     model = data_processing.load_model(
         model_object,
         model_path=model_path,
-        n_features=config.number_of_columns,
+        n_features=n_features,
         z_dim=config.latent_space_size,
     )
     model.eval()
 
-    # Give the encoding function the correct input as tensor
-    # if config.data_dimension == 2:
-    #     data_tensor = (
-    #         torch.from_numpy(data.astype("float32", casting="same_kind"))
-    #         .to(device)
-    #         .view(data.shape[0], 1, data.shape[1], data.shape[2])
-    #     )
-    # elif config.data_dimension == 1:
-    #     data_tensor = torch.from_numpy(data).to(device)
     if config.data_dimension == 2:
         if config.model_type == "convolutional" and config.model_name == "Conv_AE_3D":
             data_tensor = torch.tensor(data, dtype=torch.float32).view(
@@ -593,6 +598,7 @@ def decompress(
     model_name,
     config,
     output_path,
+    original_shape,
 ):
     """Function which performs the decompression of the compressed file. In order to decompress, you must have a
     compressed file, whose path is determined by `input_path`, a model from path `model_path` and a model_name. The
@@ -637,7 +643,7 @@ def decompress(
     bs = config.batch_size
     model_dict = torch.load(str(model_path), map_location=get_device())
     if config.data_dimension == 2 and config.model_type == "dense":
-        number_of_columns = int(np.sqrt(len(model_dict[list(model_dict.keys())[-1]])))
+        number_of_columns = int((len(model_dict[list(model_dict.keys())[-1]])))
     else:
         number_of_columns = len(model_dict[list(model_dict.keys())[-1]])
 
@@ -695,7 +701,7 @@ def decompress(
 
     if config.data_dimension == 2 and config.model_type == "dense":
         decompressed = decompressed.reshape(
-            (len(decompressed), number_of_columns, number_of_columns)
+            (len(decompressed), original_shape[1], original_shape[2])
         )
 
     return decompressed, names, normalization_features

diff --git a/baler/modules/models.py b/baler/modules/models.py
@@ -194,8 +194,6 @@ def __init__(self, n_features, z_dim, *args, **kwargs):
 
         self.activations = {}
 
-        n_features = n_features * n_features
-
         # encoder
         self.en1 = nn.Linear(n_features, 200, dtype=torch.float)
         self.en2 = nn.Linear(200, 100, dtype=torch.float)

diff --git a/workspaces/CFD_workspace/CFD_project_animation/config/CFD_project_animation_config.py b/workspaces/CFD_workspace/CFD_project_animation/config/CFD_project_animation_config.py
@@ -28,7 +28,8 @@ def set_config(c):
     c.compress_to_latent_space = False
     c.save_error_bounded_deltas = False
     c.error_bounded_requirement = 1
-    c.convert_to_blocks = [1, 50, 50]
+    c.convert_to_blocks = False
+    # c.custom_loss_function = "loss_function_swae"
 
 
 # def set_config(c):