Experiments/kyle models (#3)

* add mobilenet_v3small and vgg16 models * ignore tfrecords * my ymls, plus model_fit to CLI * vgg16 try1 yml * fix batch, shuffle data once on load * add forest/non forest count to cm title --------- Co-authored-by: John Dilger <[email protected]>
sig-gis · Apr 9, 2024 · 219f5e2 · 219f5e2
1 parent 1302953
commit 219f5e2
Show file tree

Hide file tree

Showing 11 changed files with 274 additions and 117 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ logs/
 /**/*.png
 *.tif
 *.tiff
+*.tfrecord.gz
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/fao_models/dataloader.py b/fao_models/dataloader.py
@@ -1,33 +1,38 @@
 import tensorflow as tf
 import os
 
+
 def _parse_function(proto):
     # Define the parsing schema
     feature_description = {
-        'image': tf.io.FixedLenFeature([], tf.string),
-        'label': tf.io.FixedLenFeature([], tf.string),
+        "image": tf.io.FixedLenFeature([], tf.string),
+        "label": tf.io.FixedLenFeature([], tf.string),
     }
     # Parse the input `tf.train.Example` proto using the schema
     example = tf.io.parse_single_example(proto, feature_description)
-    image = tf.io.parse_tensor(example['image'], out_type=tf.float32)
-    label = tf.io.parse_tensor(example['label'], out_type=tf.int64)
+    image = tf.io.parse_tensor(example["image"], out_type=tf.float32)
+    label = tf.io.parse_tensor(example["label"], out_type=tf.int64)
     image.set_shape([32, 32, 4])  # Set the shape explicitly if not already defined
     label.set_shape([])  # For scalar labels
     return image, label
 
+
 def load_dataset_from_tfrecords(tfrecord_dir, batch_size=32):
 
     pattern = tfrecord_dir + "/*.tfrecord.gz"
     files = tf.data.Dataset.list_files(pattern)
     dataset = files.interleave(
         lambda x: tf.data.TFRecordDataset(x, compression_type="GZIP"),
         cycle_length=tf.data.AUTOTUNE,
-        block_length=1
+        block_length=1,
+    )
+    dataset = dataset.map(_parse_function, num_parallel_calls=tf.data.AUTOTUNE).batch(
+        batch_size, drop_remainder=True
     )
-    dataset = dataset.map(_parse_function, num_parallel_calls=tf.data.AUTOTUNE)
-    dataset = dataset.shuffle(buffer_size=1000)
+    dataset = dataset.shuffle(buffer_size=100_000, seed=42)
     return dataset
 
+
 def split_dataset(dataset, total_examples, test_split=0.2, batch_size=32):
     test_size = int(total_examples * test_split)
     train_size = total_examples - test_size
@@ -36,7 +41,9 @@ def split_dataset(dataset, total_examples, test_split=0.2, batch_size=32):
     train_batches = train_size // batch_size
     test_batches = test_size // batch_size
 
-    train_dataset = dataset.take(train_batches).batch(batch_size).prefetch(tf.data.AUTOTUNE)
-    test_dataset = dataset.skip(train_batches).take(test_batches).batch(batch_size).prefetch(tf.data.AUTOTUNE)
+    train_dataset = dataset.take(train_batches).prefetch(tf.data.AUTOTUNE)
+    test_dataset = (
+        dataset.skip(train_batches).take(test_batches).prefetch(tf.data.AUTOTUNE)
+    )
 
-    return train_dataset, test_dataset
+    return train_dataset, test_dataset
diff --git a/fao_models/graveyard/model_graveyard.py b/fao_models/graveyard/model_graveyard.py
@@ -1,3 +1,20 @@
+def model1(optimizer, loss_fn, metrics=[]):
+
+    model = models.Sequential(
+        [
+            layers.Input(shape=(32, 32, 4)),
+            layers.Flatten(),
+            layers.Dense(64, activation="relu"),
+            layers.Dense(1, activation="softmax"),
+        ]
+    )
+
+    model.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)
+
+    return model
+
+
+
 # Define your TensorFlow models here
 def cnn_v1_softmax_onehot(optimizer,loss_fn,metrics=['accuracy']):
     def conv_block(input_tensor, num_filters):

diff --git a/fao_models/model_fit.py b/fao_models/model_fit.py
@@ -9,9 +9,10 @@
 import yaml
 from pprint import pformat
 from functools import partial
+import argparse
 
-# TODO: make this single CLI arg input
-config_file = r"runc3.yml"
+# # TODO: make this single CLI arg input
+# config_file = r"runc3.yml"
 
 # setup logging
 logging.basicConfig(
@@ -26,94 +27,115 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
-with open(config_file, "r") as file:
-    config_data = yaml.safe_load(file)
-
-# retrieve parameters
-experiment_name = config_data["experiment_name"]
-model_name = config_data["model_name"]
-total_examples = config_data["total_examples"]
-data_dir = config_data["data_dir"]
-data_split = config_data["data_split"]
-epochs = config_data["epochs"]
-learning_rate = config_data["learning_rate"]
-batch_size = config_data["batch_size"]
-buffer_size = config_data["buffer_size"]
-optimizer = config_data["optimizer"]
-optimizer_use_lr_schedular = config_data["optimizer_use_lr_schedular"]
-loss_function = config_data["loss_function"]
-early_stopping_patience = config_data["early_stopping_patience"]
-
-# hyperbolically decrease the learning rate to 1/2 of the base rate at 1,000 epochs, 1/3 at 2,000 epochs, and so on.
-if optimizer == "adam":
-    if optimizer_use_lr_schedular:
-        steps_per_epoch = total_examples * data_split // batch_size
-        lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
-            initial_learning_rate=learning_rate,
-            decay_steps=steps_per_epoch * epochs,
-            decay_rate=1,
-            staircase=False,
-        )
-        logger.info(
-            f"Using a learning rate schedule of InverseTimeDecay, decay_steps={steps_per_epoch*epochs}"
-        )
-        optimizer = tf.keras.optimizers.Adam(lr_schedule)
-    else:
-        optimizer = tf.keras.optimizers.Adam()
-
-# pull model from config
-model = get_model(model_name, optimizer=optimizer, loss_fn=loss_function)
-print(model.summary())
 
-logger.info("Config file: %s", config_file)
-logger.info("Parameters:")
-logger.info(pformat(config_data))
+def main():
 
-# Load the dataset without batching
-dataset = dl.load_dataset_from_tfrecords(data_dir)
+    # initalize new cli parser
+    parser = argparse.ArgumentParser(description="Train a model with a .yml file.")
 
-# Split the dataset into training and testing
-train_dataset, test_dataset = dl.split_dataset(
-    dataset, total_examples, test_split=data_split, batch_size=batch_size
-)
-train_dataset = train_dataset.shuffle(buffer_size, reshuffle_each_iteration=True)
+    parser.add_argument(
+        "-c",
+        "--config",
+        type=str,
+        help="path to .yml file",
+    )
 
+    args = parser.parse_args()
+
+    config_file = args.config
+
+    with open(config_file, "r") as file:
+        config_data = yaml.safe_load(file)
+
+    # retrieve parameters
+    experiment_name = config_data["experiment_name"]
+    model_name = config_data["model_name"]
+    total_examples = config_data["total_examples"]
+    data_dir = config_data["data_dir"]
+    data_split = config_data["data_split"]
+    epochs = config_data["epochs"]
+    learning_rate = config_data["learning_rate"]
+    batch_size = config_data["batch_size"]
+    buffer_size = config_data["buffer_size"]
+    optimizer = config_data["optimizer"]
+    optimizer_use_lr_schedular = config_data["optimizer_use_lr_schedular"]
+    loss_function = config_data["loss_function"]
+    early_stopping_patience = config_data["early_stopping_patience"]
+
+    # hyperbolically decrease the learning rate to 1/2 of the base rate at 1,000 epochs, 1/3 at 2,000 epochs, and so on.
+    if optimizer == "adam":
+        if optimizer_use_lr_schedular:
+            steps_per_epoch = total_examples * data_split // batch_size
+            lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
+                initial_learning_rate=learning_rate,
+                decay_steps=steps_per_epoch * epochs,
+                decay_rate=1,
+                staircase=False,
+            )
+            logger.info(
+                f"Using a learning rate schedule of InverseTimeDecay, decay_steps={steps_per_epoch*epochs}"
+            )
+            optimizer = tf.keras.optimizers.Adam(lr_schedule)
+        else:
+            optimizer = tf.keras.optimizers.Adam()
+
+    # pull model from config
+    model = get_model(model_name, optimizer=optimizer, loss_fn=loss_function)
+    print(model.summary())
+
+    logger.info("Config file: %s", config_file)
+    logger.info("Parameters:")
+    logger.info(pformat(config_data))
+
+    # Load the dataset without batching
+    dataset = dl.load_dataset_from_tfrecords(data_dir, batch_size=batch_size)
+
+    # Split the dataset into training and testing
+    train_dataset, test_dataset = dl.split_dataset(
+        dataset, total_examples, test_split=data_split, batch_size=batch_size
+    )
+    train_dataset = train_dataset.shuffle(buffer_size, reshuffle_each_iteration=True)
 
-logger.info("Starting model training...")
-LOGS_DIR = os.path.join(
-    os.path.dirname(os.path.dirname(__file__)), "logs", experiment_name
-)
-if not os.path.exists(LOGS_DIR):
-    os.makedirs(LOGS_DIR)
-
-# setup for confusion matrix callback
-tb_samples = train_dataset.take(1)
-x = list(map(lambda x: x[0], tb_samples))[0]
-y = list(map(lambda x: x[1], tb_samples))[0]
-class_names = ["nonforest", "forest"]
-
-# initialize and add tb callbacks
-callbacks = []
-file_writer = tf.summary.create_file_writer(LOGS_DIR)
-cm_callback = CmCallback(y, x, class_names, file_writer)
-
-if early_stopping_patience is not None:
-    logger.info(f"Using early stopping. Patience: {early_stopping_patience}")
-    early_stop = tf.keras.callbacks.EarlyStopping(
-        monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
+    logger.info("Starting model training...")
+    LOGS_DIR = os.path.join(
+        os.path.dirname(os.path.dirname(__file__)), "logs", experiment_name
+    )
+    if not os.path.exists(LOGS_DIR):
+        os.makedirs(LOGS_DIR)
+
+    # setup for confusion matrix callback
+    tb_samples = train_dataset.take(1)
+    x = list(map(lambda x: x[0], tb_samples))[0]
+    y = list(map(lambda x: x[1], tb_samples))[0]
+    class_names = ["nonforest", "forest"]
+
+    # initialize and add tb callbacks
+    callbacks = []
+    file_writer = tf.summary.create_file_writer(LOGS_DIR)
+    cm_callback = CmCallback(y, x, class_names, file_writer)
+
+    if early_stopping_patience is not None:
+        logger.info(f"Using early stopping. Patience: {early_stopping_patience}")
+        early_stop = tf.keras.callbacks.EarlyStopping(
+            monitor="val_loss",
+            patience=early_stopping_patience,
+            restore_best_weights=True,
+        )
+        callbacks.append(early_stop)
+    callbacks.append(cm_callback)
+    callbacks.append(tf.keras.callbacks.TensorBoard(LOGS_DIR))
+
+    history = model.fit(
+        train_dataset,
+        epochs=epochs,
+        validation_data=test_dataset,
+        callbacks=callbacks,
     )
-    callbacks.append(early_stop)
-callbacks.append(cm_callback)
-callbacks.append(tf.keras.callbacks.TensorBoard(LOGS_DIR))
 
+    logger.info("Model training complete")
+    logger.info("Training history:")
+    logger.info(pformat(history.history))
 
-history = model.fit(
-    train_dataset,
-    epochs=epochs,
-    validation_data=test_dataset,
-    callbacks=callbacks,
-)
 
-logger.info("Model training complete")
-logger.info("Training history:")
-logger.info(pformat(history.history))
+if __name__ == "__main__":
+    main()