From 10e8581d28e9503b7796f97a63a62baef964a07b Mon Sep 17 00:00:00 2001 From: kyle-woodward Date: Tue, 11 Jun 2024 09:58:28 -0400 Subject: [PATCH] all changes on old laptop [clean up later] --- fao_models/beam_pipeline.py | 179 ++++++ .../beam_pipelines/johns_beam_ssl4eo.py | 213 +++++++ fao_models/beam_utils.py | 137 +++++ fao_models/common.py | 18 +- fao_models/freezing_weights.py | 105 ++++ .../freezing_weights_test_real-models.py | 90 +++ fao_models/graveyard/batch_splits_inquiry.py | 61 ++ fao_models/model_predict_batch.py | 106 ++++ fao_models/plotting/learning_rates.py | 2 +- .../test_inference_pipeline_steps.ipynb | 526 ++---------------- fao_models/validate_eval_metrics.py | 6 +- ...mall-epochs10-batch32-lr01_testPredict.yml | 21 + ...all-epochs30-batch64-lr001_testPredict.yml | 21 + ...h64-lr001-seed5-lrdecay5-tfrecords-all.yml | 20 + runc-resnet-epochs10-batch64-lr001-seed5.yml | 19 + runc-resnet-epochs100-batch64-lr001.yml | 18 + ...h64-lr001-seed5-lrdecay5-tfrecords-all.yml | 20 + ...pochs30-batch64-lr001-seed5-lrdecay2_5.yml | 20 + ...-epochs30-batch64-lr001-seed5-lrdecay5.yml | 20 + runc-resnet-epochs5-batch64-lr001-seed42.yml | 19 + ...seed5-lrdecay5-rerun-afterdatadownload.yml | 20 + ...h64-lr001-seed5-lrdecay5-tfrecords-all.yml | 20 + ...batch64-lr001-seed5-reshuffledEachIter.yml | 19 + ...et-epochs5-batch64-lr001_testingBugFix.yml | 19 + 24 files changed, 1208 insertions(+), 491 deletions(-) create mode 100644 fao_models/beam_pipeline.py create mode 100644 fao_models/beam_pipelines/johns_beam_ssl4eo.py create mode 100644 fao_models/beam_utils.py create mode 100644 fao_models/freezing_weights.py create mode 100644 fao_models/freezing_weights_test_real-models.py create mode 100644 fao_models/graveyard/batch_splits_inquiry.py create mode 100644 fao_models/model_predict_batch.py create mode 100644 runc-mobilenetv3small-epochs10-batch32-lr01_testPredict.yml create mode 100644 runc-mobilenetv3small-epochs30-batch64-lr001_testPredict.yml create mode 100644 runc-resnet-epochs10-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml create mode 100644 runc-resnet-epochs10-batch64-lr001-seed5.yml create mode 100644 runc-resnet-epochs100-batch64-lr001.yml create mode 100644 runc-resnet-epochs20-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml create mode 100644 runc-resnet-epochs30-batch64-lr001-seed5-lrdecay2_5.yml create mode 100644 runc-resnet-epochs30-batch64-lr001-seed5-lrdecay5.yml create mode 100644 runc-resnet-epochs5-batch64-lr001-seed42.yml create mode 100644 runc-resnet-epochs5-batch64-lr001-seed5-lrdecay5-rerun-afterdatadownload.yml create mode 100644 runc-resnet-epochs5-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml create mode 100644 runc-resnet-epochs5-batch64-lr001-seed5-reshuffledEachIter.yml create mode 100644 runc-resnet-epochs5-batch64-lr001_testingBugFix.yml diff --git a/fao_models/beam_pipeline.py b/fao_models/beam_pipeline.py new file mode 100644 index 0000000..b673de8 --- /dev/null +++ b/fao_models/beam_pipeline.py @@ -0,0 +1,179 @@ +import collections +import typing +import argparse +from types import SimpleNamespace +import csv +import io +import logging + +import apache_beam as beam +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.io import ReadFromCsv, WriteToText + +from beam_utils import parse_shp_to_latlon +from common import load_yml + + +# want my pipeline to have these general steps + +# 1. Read in data from SHP (hexagons were provided as SHP and CSV but CSV has no geom column, centroids only came as SHP file) +# 2. parse data into row-wise elements of (global id, [lon,lat]) - rest of pipeline passes these elements through +# 3. download imagery for each element and convert to a tensor +# 4. load model and run inference on tensor to return prediction value +# 5. write prediction value to new CSV file with (global id, lat, long, prediction value) + +class GetPatch(beam.DoFn): + def __init__(self): + super().__init__() + + def setup(self): + import ee + import google.auth + + credentials, _ = google.auth.default() + ee.Initialize( + credentials, + project="pc530-fao-fra-rss", + opt_url="https://earthengine-highvolume.googleapis.com", + ) + return super().setup() + + def process(self, element): + from beam_utils import get_ee_img, get_patch_numpy, to_tensor + + # element is a tuple of (global_id, [lon,lat]) + global_id = element[0] + coords = element[1] + + image = get_ee_img(coords) + patch = get_patch_numpy(coords, image) + patch_tensor = to_tensor(patch) + + yield { + "id": global_id, + "long": coords[0], + "lat": coords[1], + "patch": patch_tensor + } + +class Predict(beam.DoFn): + def __init__(self, config_path): + from common import load_yml + # from _types import Config # Config was a dataclass subclass in Johns repo that type casts the yml file loaded.. + + self._config = load_yml(config_path) + super().__init__() + + def setup(self): + # load the model + from models import get_model, freeze + self.model = get_model(model_name=self._config.model_name, + optimizer = self._config.optimizer, + loss_fn = self._config.loss_fn, + training_mode = True) + self.model.load_weights(self._config.checkpoint) + freeze(self.model) + + return super().setup() + + def process(self, element): + + model = self.model + patch = element["patch"] + prob = round(float(model(patch).numpy()),2) + prediction = "Forest" if prob > 0.5 else "Non-Forest" + + yield { + "id": element["id"], + "long": element["long"], + "lat": element["lat"], + "prob_label": prob, + "pred_label": prediction + } + +# https://github.com/kubeflow/examples/blob/master/LICENSE +class DictToCSVString(beam.DoFn): + """Convert incoming dict to a CSV string. + + This DoFn converts a Python dict into + a CSV string. + + Args: + fieldnames: A list of strings representing keys of a dict. + """ + + def __init__(self, fieldnames): + super(DictToCSVString, self).__init__() + + self.fieldnames = fieldnames + + def process(self, element, *_args, **_kwargs): + """Convert a Python dict instance into CSV string. + + This routine uses the Python CSV DictReader to + robustly convert an input dict to a comma-separated + CSV string. This also handles appropriate escaping of + characters like the delimiter ",". The dict values + must be serializable into a string. + + Args: + element: A dict mapping string keys to string values. + { + "key1": "STRING", + "key2": "STRING" + } + + Yields: + A string representing the row in CSV format. + """ + fieldnames = self.fieldnames + filtered_element = { + key: value for (key, value) in element.items() if key in fieldnames + } + with io.StringIO() as stream: + writer = csv.DictWriter(stream, fieldnames) + writer.writerow(filtered_element) + csv_string = stream.getvalue().strip("\r\n") + + yield csv_string + + +def pipeline(beam_options, + dotargs: SimpleNamespace): + if beam_options is not None: + beam_options = PipelineOptions(**load_yml(beam_options)) + + pColl = parse_shp_to_latlon(dotargs.input) + cols = ["id", "long", "lat", "prob_label", "pred_label"] + with beam.Pipeline() as p: + ( + p + | "Construct PCollection" >> beam.Create(pColl) + | "Get Patch" >> beam.ParDo(GetPatch()) + | "Predict" + >> beam.ParDo(Predict(config_path=dotargs.model_config)) + | "Dict To CSV String" >> beam.ParDo(DictToCSVString(cols)) + | "Write String To CSV" >> WriteToText(dotargs.output, header=",".join(cols)) + ) + +#test file +# file = 'C:\\Users\\kyle\\Downloads\\FRA_hex_shp_5records.shp' +def run(): + argparse.FileType() + + parser = argparse.ArgumentParser() + parser.add_argument("--input", "-i", type=str, required=True) + parser.add_argument("--output", "-o", type=str, required=True) + parser.add_argument("--model-config", "-mc", type=str, required=True) + group = parser.add_argument_group("pipeline-options") + group.add_argument("--beam-config", "-bc", type=str) + args = parser.parse_args() + + pipeline( + beam_options=args.beam_config, + dotargs=args) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + run() \ No newline at end of file diff --git a/fao_models/beam_pipelines/johns_beam_ssl4eo.py b/fao_models/beam_pipelines/johns_beam_ssl4eo.py new file mode 100644 index 0000000..770eebf --- /dev/null +++ b/fao_models/beam_pipelines/johns_beam_ssl4eo.py @@ -0,0 +1,213 @@ +import collections +import argparse +from types import SimpleNamespace +import csv +import io + +import apache_beam as beam +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.io import ReadFromCsv, WriteToText + +from common import load_yml + + +TMP = "/Users/johndilger/Documents/projects/SSL4EO-S12/fao_models/TMP" +BANDS = [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8", + "B8A", + "B9", + "B10", + "B11", + "B12", +] +CROPS = [44, 264, 264, 264, 132, 132, 132, 264, 132, 44, 44, 132, 132] +PROJECT = "pc530-fao-fra-rss" + + +# https://github.com/kubeflow/examples/blob/master/LICENSE +class DictToCSVString(beam.DoFn): + """Convert incoming dict to a CSV string. + + This DoFn converts a Python dict into + a CSV string. + + Args: + fieldnames: A list of strings representing keys of a dict. + """ + + def __init__(self, fieldnames): + super(DictToCSVString, self).__init__() + + self.fieldnames = fieldnames + + def process(self, element, *_args, **_kwargs) -> collections.abc.Iterator[str]: + """Convert a Python dict instance into CSV string. + + This routine uses the Python CSV DictReader to + robustly convert an input dict to a comma-separated + CSV string. This also handles appropriate escaping of + characters like the delimiter ",". The dict values + must be serializable into a string. + + Args: + element: A dict mapping string keys to string values. + { + "key1": "STRING", + "key2": "STRING" + } + + Yields: + A string representing the row in CSV format. + """ + fieldnames = self.fieldnames + filtered_element = { + key: value for (key, value) in element.items() if key in fieldnames + } + with io.StringIO() as stream: + writer = csv.DictWriter(stream, fieldnames) + writer.writerow(filtered_element) + csv_string = stream.getvalue().strip("\r\n") + + yield csv_string + + +class ComputeWordLengthFn(beam.DoFn): + def process(self, element): + return [len(element)] + + +class Predict(beam.DoFn): + def __init__(self, config_path): + from common import load_yml + from _types import Config + + self._config = Config(**load_yml(config_path)) + super().__init__() + + def setup(self): + self.load_model() + return super().setup() + + def load_model(self): + """load model""" + from models._models import get_model + from models.dino.utils import restart_from_checkpoint + import os + + c = self._config + self.model, self.linear_classifier = get_model(**c.__dict__) + restart_from_checkpoint( + os.path.join(c.model_head_root), + state_dict=self.linear_classifier, + ) + + def process(self, element): + import torch + from datasets.ssl4eo_dataset import SSL4EO + + dataset = SSL4EO( + root=element["img_root"].parent, + mode="s2c", + normalize=False, # todo add normalized to self._config. + ) + + image = dataset[0] + image = torch.unsqueeze(torch.tensor(image), 0).type(torch.float32) + + self.linear_classifier.eval() + with torch.no_grad(): + intermediate_output = self.model.get_intermediate_layers( + image, self._config.n_last_blocks + ) + output = torch.cat([x[:, 0] for x in intermediate_output], dim=-1) + + output = self.linear_classifier(output) + element["prob_label"] = output.detach().cpu().item() + element["pred_label"] = round(element["prob_label"]) + yield element + + +class GetImagery(beam.DoFn): + def __init__(self, dst): + self.dst = dst + super().__init__() + + def setup(self): + import ee + import google.auth + + credentials, _ = google.auth.default() + ee.Initialize( + credentials, + project=PROJECT, + opt_url="https://earthengine-highvolume.googleapis.com", + ) + return super().setup() + + def process(self, element): + """download imagery""" + from download_data.download_wraper import single_patch + from pathlib import Path + + sample = element + coords = (sample.long, sample.lat) + local_root = Path(self.dst) + img_root = single_patch( + coords, + id=sample.id, + dst=local_root / "imgs", + year=2019, + bands=BANDS, + crop_dimensions=CROPS, + ) + yield { + "img_root": img_root, + "long": sample.long, + "lat": sample.lat, + "id": sample.id, + } + + +def pipeline(beam_options, dotargs: SimpleNamespace): + if beam_options is not None: + beam_options = PipelineOptions(**load_yml(beam_options)) + + cols = ["id", "long", "lat", "prob_label", "pred_label"] + with beam.Pipeline() as p: + bdf = ( + p + | "read input data" >> ReadFromCsv(dotargs.input) + | "download imagery" + >> beam.ParDo(GetImagery(dst=TMP)).with_output_types(dict) + | "predict" + >> beam.ParDo(Predict(config_path=dotargs.model_config)).with_output_types( + dict + ) + | "to csv str" >> beam.ParDo(DictToCSVString(cols)) + | "write to csv" >> WriteToText(dotargs.output, header=",".join(cols)) + ) + + +def run(): + argparse.FileType() + + parser = argparse.ArgumentParser() + parser.add_argument("--input", "-i", type=str, required=True) + parser.add_argument("--output", "-o", type=str, required=True) + parser.add_argument("--model-config", "-mc", type=str, required=True) + group = parser.add_argument_group("pipeline-options") + group.add_argument("--beam-config", "-bc", type=str) + args = parser.parse_args() + + pipeline(beam_options=args.beam_config, dotargs=args) + + +if __name__ == "__main__": + run() \ No newline at end of file diff --git a/fao_models/beam_utils.py b/fao_models/beam_utils.py new file mode 100644 index 0000000..af041dc --- /dev/null +++ b/fao_models/beam_utils.py @@ -0,0 +1,137 @@ +import geopandas as gpd +import ee +import google.auth +import io +from google.api_core import retry +import numpy as np +from models import get_model, freeze + +def parse_shp_to_latlon(file): + gdf = gpd.read_file(file) + gdf.loc[:,'centroid'] = gdf.geometry.centroid + gdf.loc[:,'lonlat'] = gdf.centroid.apply(lambda x: [x.x, x.y]) + return gdf[['global_id', 'lonlat']].values.tolist() + +def get_ee_img(coords): + """retrieve s2 image composite from ee at given coordinates. coords is a tuple of (lon, lat) in degrees.""" + ## MAKE S2 COMPOSITE IN HEXAGONS ########################################## + # Using Cloud Score + for cloud/cloud-shadow masking + # Harmonized Sentinel-2 Level 2A collection. + s2 = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED") + + # Cloud Score+ image collection. Note Cloud Score+ is produced from Sentinel-2 + # Level 1C data and can be applied to either L1C or L2A collections. + csPlus = ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED") + + # Use 'cs' or 'cs_cdf', depending on your use case; see docs for guidance. + QA_BAND = "cs_cdf" + + # The threshold for masking; values between 0.50 and 0.65 generally work well. + # Higher values will remove thin clouds, haze & cirrus shadows. + CLEAR_THRESHOLD = 0.50 + + # Make a clear median composite. + sampleImage = ( + s2.filterDate("2023-01-01", "2023-12-31") + .filterBounds(ee.Geometry.Point(coords[0], coords[1]).buffer(64*10)) # only images touching 64 pixel centroid buffer + .linkCollection(csPlus, [QA_BAND]) + .map(lambda img: img.updateMask(img.select(QA_BAND).gte(CLEAR_THRESHOLD))) + .median() + .select(["B4", "B3", "B2", "B8"], ["R", "G", "B", "N"]) + ) + return sampleImage + +@retry.Retry() +def get_patch_numpy(coords, image, format="NPY"): + """Uses ee.data.ComputePixels() to get a 32x32 patch centered on the coordinates, as a numpy array.""" + + # Output resolution in meters. + SCALE = 10 + + # Pre-compute a geographic coordinate system. + proj = ee.Projection("EPSG:4326").atScale(SCALE).getInfo() + + # Get scales in degrees out of the transform. + SCALE_X = proj["transform"][0] + SCALE_Y = -proj["transform"][4] + + # Patch size in pixels. + PATCH_SIZE = 32 + + # Offset to the upper left corner. + OFFSET_X = -SCALE_X * PATCH_SIZE / 2 + OFFSET_Y = -SCALE_Y * PATCH_SIZE / 2 + + REQUEST = { + "fileFormat": "NPY", + "grid": { + "dimensions": {"width": PATCH_SIZE, "height": PATCH_SIZE}, + "affineTransform": { + "scaleX": SCALE_X, + "shearX": 0, + "shearY": 0, + "scaleY": SCALE_Y, + }, + "crsCode": proj["crs"], + }, + } + + request = dict(REQUEST) + request["fileFormat"] = format + request["expression"] = image + request["grid"]["affineTransform"]["translateX"] = coords[0] + OFFSET_X + request["grid"]["affineTransform"]["translateY"] = coords[1] + OFFSET_Y + return np.load(io.BytesIO(ee.data.computePixels(request))) + +def to_tensor(patch): + """ + Converts a numpy array to a tf tensor + """ + from numpy.lib.recfunctions import structured_to_unstructured + + unstruct = structured_to_unstructured(patch) # converts to CHW shape + rescaled = unstruct.astype(np.float64) / 10000 # scale it + reshaped = np.reshape(rescaled, (1, 32, 32, 4)) # batch it + return reshaped + +def make_inference(tensor): + """Loads model for inference and returns prediction on the provided tensor""" + import numpy as np + # 20-epoch resnet trained on full tfrecord set (tfrecords/all) + model_name = "resnet" + optimizer = "adam" + loss_function = "binary_crossentropy" + checkpoint = "C:\\fao-models\\saved_models\\resnet-epochs20-batch64-lr001-seed5-lrdecay5-tfrecords-all\\best_model.h5" + model = get_model(model_name, optimizer=optimizer, loss_fn=loss_function, training_mode=True) + model.load_weights(checkpoint) + freeze(model) + + prob = round(float(model(tensor).numpy()),2) + prediction = "Forest" if prob > 0.5 else "Non-Forest" + return prob, prediction + +# testing + +# PROJECT = "pc530-fao-fra-rss" # change to your cloud project name + +# ## INIT WITH HIGH VOLUME ENDPOINT +# credentials, _ = google.auth.default() +# ee.Initialize( +# credentials, +# project=PROJECT, +# opt_url="https://earthengine-highvolume.googleapis.com",) + +# pColl = parse_shp_to_latlon('C:\\Users\\kyle\\Downloads\\FRA_hex_shp_5records.shp') +# coords = [] +# preds = [] +# for nested_l in pColl: +# coord = nested_l[1] +# img = get_ee_img(coord) +# patch = get_patch_numpy(coord, img) +# tensor = to_tensor(patch) +# prediction = make_inference(tensor) +# coords.append(coord) +# preds.append(prediction) +# print(coords) +# print(preds) + diff --git a/fao_models/common.py b/fao_models/common.py index 6967be7..17c5592 100644 --- a/fao_models/common.py +++ b/fao_models/common.py @@ -1,7 +1,17 @@ """The common module contains common functions and classes used by the other modules. """ +import yaml +from pathlib import Path + + +def load_yml(_input: str): + with open(_input, "r") as f: + args = yaml.safe_load(f) + + # #tests for later maybe + # assert a1 == a2, "PAth and str are not same" + return args + +# test = load_yml("C:\\fao-models\\runc-resnet-epochs20-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml") +# print(test) -def hello_world(): - """Prints "Hello World!" to the console. - """ - print("Hello World!") \ No newline at end of file diff --git a/fao_models/freezing_weights.py b/fao_models/freezing_weights.py new file mode 100644 index 0000000..0ee6761 --- /dev/null +++ b/fao_models/freezing_weights.py @@ -0,0 +1,105 @@ +#%% +import keras +from models import get_model +import dataloader as dl +import numpy as np + +#%% +dir_path = "C:\\fao-models\\tfrecords\\all" +dataset = dl.load_dataset_from_tfrecords(tfrecord_dir=dir_path, batch_size=32, buffer_size=1000, seed=5) +train_dataset, test_dataset, val_dataset = dl.split_dataset(dataset, total_examples=2000, test_split=0.2, batch_size=32, val_split=0.1) + +# y_true = np.concatenate([y for x, y in dataset], axis=0) +# print('y_true count: ',len(y_true)) +# vals, counts = np.unique(y_true, return_counts=True) +# print('vals, counts: ',[vals, counts]) + +# %% +# load model +model = get_model(model_name="resnet", optimizer="adam", loss_fn="binary_crossentropy") +# model.summary() +# #%% +# # train the model +# history = model.fit( +# train_dataset, +# epochs=1, +# validation_data=test_dataset, +# # callbacks=callbacks, +# ) +#%% + +def freeze(model): + """Freeze model weights in every layer.""" + for layer in model.layers: + layer.trainable = False + + if isinstance(layer, keras.models.Model): + freeze(layer) + return model + +def unfreeze(model): + """Unfreeze model weights in every layer.""" + for layer in model.layers: + layer.trainable = True + + if isinstance(layer, keras.models.Model): + unfreeze(layer) + return model + +normal_weights_file = "C:\\fao-models\\saved_models\\test-freezing-weights\\model.h5" +frozen_weights_file = "C:\\fao-models\\saved_models\\test-freezing-weights\\frozen_model.h5" +#%% +# save model as-is, no freezing +# model.save_weights(normal_weights_file) + +# freeze model then save +# frozen = freeze(model) +# frozen.save_weights(frozen_weights_file) + +# load model checkpoint from normal weights and from frozen see if any differneces +# loading non-frozen weights into non-frozen model ok +model1 = model2 = model +model1.load_weights(normal_weights_file) + +# if you don't freeze model before loading weights of a frozen model, get axes don't match array error +freeze(model2) +model2.load_weights(frozen_weights_file) + +#%% +model1.evaluate(val_dataset) +model2.evaluate(val_dataset) +#%% +preds_model1 = model1.predict(val_dataset)[0:20] +preds_model2 = model2.predict(val_dataset)[0:20] +assert np.array_equal(preds_model1, preds_model2), "Predictions are not equal" +#%% +# load weights from trainable model +model3 = get_model(model_name="resnet", optimizer="adam", loss_fn="binary_crossentropy") +model3.load_weights(normal_weights_file) + +# then freeze the model for inference +freeze(model3) +model3.evaluate(val_dataset) +preds_model3_frozen = model3.predict(val_dataset)[0:20] + +# try unfreezing and training it again +unfreeze(model3) +model3.evaluate(val_dataset) +preds_model3_unfrozen = model3.predict(val_dataset)[0:20] + +assert np.array_equal(preds_model3_frozen, preds_model3_unfrozen), "Predictions are not equal" +print(preds_model3_frozen) +print(preds_model3_unfrozen) + +#%% + + +# # in another script or workflow... +# new_model_unfrozen = get_model(model_name="resnet", optimizer="adam", loss_fn="binary_crossentropy") +# new_model_unfrozen.load_weights(normal_weights_file) # try to load weights + +# new_model_frozen = get_model(model_name="resnet", optimizer="adam", loss_fn="binary_crossentropy") +# new_model_frozen.load_weights(frozen_weights_file) # try to load weights +# %% +# print(new_model_unfrozen.predict(val_dataset.take(1))) +# %% diff --git a/fao_models/freezing_weights_test_real-models.py b/fao_models/freezing_weights_test_real-models.py new file mode 100644 index 0000000..134fb0a --- /dev/null +++ b/fao_models/freezing_weights_test_real-models.py @@ -0,0 +1,90 @@ +#%% +import keras +from models import get_model +import dataloader as dl +import numpy as np + +def freeze(model): + """Freeze model weights in every layer.""" + for layer in model.layers: + layer.trainable = False + + if isinstance(layer, keras.models.Model): + freeze(layer) + return model + +def unfreeze(model): + """Unfreeze model weights in every layer.""" + for layer in model.layers: + layer.trainable = True + + if isinstance(layer, keras.models.Model): + unfreeze(layer) + return model + +#%% +dir_path = "C:\\fao-models\\tfrecords\\all" +dataset = dl.load_dataset_from_tfrecords(tfrecord_dir=dir_path, batch_size=32, buffer_size=140000, seed=5) +train_dataset, test_dataset, val_dataset = dl.split_dataset(dataset, total_examples=2000, test_split=0.2, batch_size=32, val_split=0.1) + +y_true = np.concatenate([y for x, y in val_dataset], axis=0) +print('y_true count: ',len(y_true)) +vals, counts = np.unique(y_true, return_counts=True) +print('vals, counts: ',[vals, counts]) +# %% +# load model +weights_file = "C:\\fao-models\\saved_models\\resnet-epochs10-batch64-lr001-seed5-lrdecay5-tfrecords-all\\best_model.h5" + +# this is how we've had it before, base_model(training=True) when building top-layers on top of the resent +model_trainable = get_model(model_name="resnet", optimizer="adam", loss_fn="binary_crossentropy", training_mode=True) +# this would be used if you don't want to train the base model as well (we do) +model_nontrainable = get_model(model_name="resnet", optimizer="adam", loss_fn="binary_crossentropy", training_mode=False) + +model_trainable.load_weights(weights_file) +model_nontrainable.load_weights(weights_file) + +print(model_trainable.summary()) +print(model_nontrainable.summary()) + +#%% +# check if trainable weights, non-trainable weights, and model predictions differ +model_trainable.evaluate(val_dataset) +model_trainable_preds = np.round(model_trainable.predict(val_dataset),1)[0:20] + +model_nontrainable.evaluate(val_dataset) +model_nontrainable_preds = np.round(model_nontrainable.predict(val_dataset),1)[0:20] + +# the trainable/non-trainable weights count are the same between them but the actual weights (np arrays) +# are different so this is not the same thing as freezing weights (model.trainable=False) +assert np.array_equal(len(model_trainable.trainable_weights),len(model_nontrainable.trainable_weights)), "Trainable weights not equal" +assert np.array_equal(len(model_trainable.non_trainable_weights),len(model_nontrainable.non_trainable_weights)), "Non-Trainable weights not equal" +# but predictions are not equal which we expected +assert np.array_equal(model_trainable_preds,model_nontrainable_preds), "Predictions not equal" + +#%% +# load model and freeze all layer weights (layer.trainable=False) +model_frozen = get_model(model_name="resnet", optimizer="adam", loss_fn="binary_crossentropy") +model_frozen.load_weights(weights_file) +freeze(model_frozen) +model_frozen.evaluate(val_dataset) +model_frozen_preds = np.round(model_frozen.predict(val_dataset),1)[0:20] + +# don't freeeze weights, just load +model_unfrozen = get_model(model_name="resnet", optimizer="adam", loss_fn="binary_crossentropy") +model_unfrozen.load_weights(weights_file) +model_unfrozen.evaluate(val_dataset) +model_unfrozen_preds = np.round(model_unfrozen.predict(val_dataset),1)[0:20] + +assert np.array_equal(len(model_frozen.trainable_weights),len(model_unfrozen.trainable_weights)), "Trainable weights not equal" +assert np.array_equal(len(model_frozen.non_trainable_weights),len(model_unfrozen.non_trainable_weights)), "Non-Trainable weights not equal" +# predictions are not equal +assert np.array_equal(model_frozen_preds,model_unfrozen_preds), "Predictions not equal" +# %% +# interestingly.. you can still train the frozen model. not sure whats actually happening if all weights are frozen +model_frozen.fit( + train_dataset, + epochs=1, + validation_data=test_dataset, + # callbacks=callbacks, + ) +# %% diff --git a/fao_models/graveyard/batch_splits_inquiry.py b/fao_models/graveyard/batch_splits_inquiry.py new file mode 100644 index 0000000..3ba7967 --- /dev/null +++ b/fao_models/graveyard/batch_splits_inquiry.py @@ -0,0 +1,61 @@ +import dataloader as dl +import numpy as np +data_dir = "C:\\fao-models\\tfrecords\\all" +batch_size=64 +buffer_size=76992 +total_examples=buffer_size +test_split=0.2 +val_split=0.1 + +# Load the dataset without batching +dataset = dl.load_dataset_from_tfrecords(data_dir, batch_size=batch_size, buffer_size=buffer_size, seed=5) + +# Split the dataset 2 ways or 3 ways +if val_split is not None: + train_dataset, test_dataset, val_dataset = dl.split_dataset( + dataset, + total_examples, + test_split=test_split, + batch_size=batch_size, + val_split=val_split, + ) + +else: + train_dataset, test_dataset = dl.split_dataset( + dataset, total_examples, test_split=test_split, batch_size=batch_size + ) + +# # checking data splits for class balance +# print('Reporting class balance for each data split...') + +# print('All Data') +# y_true = np.concatenate([y for x, y in dataset], axis=0) +# print('y_true count: ',len(y_true)) +# vals, counts = np.unique(y_true, return_counts=True) +# print('vals, counts: ',[vals, counts]) + +# print('Train Data') +# y_true_train = np.concatenate([y for x, y in train_dataset], axis=0) +# print('y_true count: ',len(y_true_train)) +# vals, counts = np.unique(y_true_train, return_counts=True) +# print('vals, counts: ',[vals, counts]) + +# print('Test Data') +# y_true_test = np.concatenate([y for x, y in test_dataset], axis=0) +# print('y_true count: ',len(y_true_test)) +# vals, counts = np.unique(y_true_test, return_counts=True) +# print('vals, counts: ',[vals, counts]) + +# print('Val Data') +# y_true_val = np.concatenate([y for x, y in val_dataset], axis=0) +# print('y_true count: ',len(y_true_val)) +# vals, counts = np.unique(y_true_val, return_counts=True) +# print('vals, counts: ',[vals, counts]) + +# train_dataset = train_dataset.shuffle( +# buffer_size, reshuffle_each_iteration=True) + +# inspect each batch to ensure it is balanced +for batch_images, batch_labels in val_dataset: + vals, counts = np.unique(batch_labels, return_counts=True) + print('vals, counts: ', [vals, counts]) diff --git a/fao_models/model_predict_batch.py b/fao_models/model_predict_batch.py new file mode 100644 index 0000000..cb20750 --- /dev/null +++ b/fao_models/model_predict_batch.py @@ -0,0 +1,106 @@ +import numpy as np +import datetime +import logging +from models import get_model, freeze +import os +import tensorflow as tf +import rasterio as rio +import yaml +import argparse +import dataloader as dl + + +logging.basicConfig( + format="%(asctime)s %(message)s", + datefmt="%Y-%m-%d %I:%M:%S %p", + level=logging.WARNING, + filename=os.path.join( + os.path.dirname(os.path.dirname(__file__)), + f'trainlog_{datetime.datetime.now().strftime("%Y-%m-%d")}.log', + ), # add _%H-%M-%S if needbe +) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def cli(): + # initalize new cli parser + parser = argparse.ArgumentParser(description="Train a model with a .yml file.") + + parser.add_argument( + "-c", + "--config", + type=str, + help="path to .yml file", + ) + parser.add_argument( + "-t", + "--test", + type=bool, + default=False, + help="Run as a test. limits total examples to 5*batch_size and adds a test prefix to experiment name", + ) + args = parser.parse_args() + + config_file = args.config + main(config_file) + + +def load_predict_model(model_name, optimizer, loss_function, weights): + model = get_model(model_name, optimizer=optimizer, loss_fn=loss_function) + model.load_weights(weights) + freeze(model) # freeze model layers before loading weights + + return model + + +def main(config: str | dict): + # load model + if isinstance(config, str): + with open(config, "r") as file: + config = yaml.safe_load(file) + model_name = config["model_name"] + weights = config["checkpoint"] + optimizer = config["optimizer"] + loss_function = config["loss_function"] + data_dir = config["data_dir"] + batch_size = config["batch_size"] + buffer_size = config["buffer_size"] + seed = config["seed"] + total_examples = config["total_examples"] + test_split = config["test_split"] + val_split = config["val_split"] + model = load_predict_model(model_name, optimizer, loss_function, weights) + + # Load the dataset without batching + dataset = dl.load_dataset_from_tfrecords(data_dir, batch_size=batch_size, buffer_size=buffer_size, seed=seed) + + # Split the dataset 2 ways or 3 ways + if val_split is not None: + train_dataset, test_dataset, val_dataset = dl.split_dataset( + dataset, + total_examples, + test_split=test_split, + batch_size=batch_size, + val_split=val_split, + ) + + else: + train_dataset, test_dataset = dl.split_dataset( + dataset, total_examples, test_split=test_split, batch_size=batch_size + ) + + model.evaluate(val_dataset) + + y_pred = model.predict(val_dataset).flatten() + print(len(y_pred)) + y_true = np.array([y for x, y in val_dataset.unbatch()]) + print(len(y_true)) + + print(list(zip(y_true,y_pred))[0:50]) + + + + +# main("dev-predict-runc-resnet-jjd.yml") +cli() diff --git a/fao_models/plotting/learning_rates.py b/fao_models/plotting/learning_rates.py index 2fc96b2..0a24304 100644 --- a/fao_models/plotting/learning_rates.py +++ b/fao_models/plotting/learning_rates.py @@ -102,7 +102,7 @@ def __init__(self, lr, lr_decay, lr_decay_step, step=0, decay_fn='inverse_time_d # for ITD decay rate needs to be more aggressive the more epochs we have # ((steps_per_epoch * epochs) ** 1/5) * 2 lr = 0.001 -lr_decay = 1.5 +lr_decay = 5 for epochs in [5,10,15,30,50,100]: decay_steps = ((steps_per_epoch * epochs) ** 1/5)*2 lr_decay = lr_decay ** 1/5 if lr_decay > 1 else lr_decay diff --git a/fao_models/test_inference_pipeline_steps.ipynb b/fao_models/test_inference_pipeline_steps.ipynb index 7af9a57..db29af6 100644 --- a/fao_models/test_inference_pipeline_steps.ipynb +++ b/fao_models/test_inference_pipeline_steps.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -105,42 +105,42 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[102.56, -1.18]\n" + "(32, 32)\n" ] } ], "source": [ - "id, latlon = 1233804841, [102.56,-1.18]#[102.19,-1.54]#[-60.25204,3.86655]#[-172.3490007781034,-13.523357265222518] #[-257.82, -1.54]#\n", - "print(latlon)\n", + "id, latlon = 1233804841, [-172.3490007781034,-13.523357265222518]#[102.41,-1.19]#[102.4, -1.19]#[102.56,-1.18]#[102.19,-1.54]#[-60.25204,3.86655]#[-172.3490007781034,-13.523357265222518] #[-257.82, -1.54]#\n", "image = get_ee_img(latlon)\n", "patch = get_patch(latlon, image)\n", - "# print(patch)" + "print(patch.shape) # (32,32)\n", + "# print(patch) # needs to be of shape (4,32,32)\n" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "fa44f35c37dd4c38bb83444edb945a32", + "model_id": "a9ddf2f08d57411dbc34562bd819e913", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Map(center=[-1.18, 102.56000000000002], controls=(WidgetControl(options=['position', 'transparent_bg'], widget…" + "Map(center=[-13.523357265222518, -172.3490007781034], controls=(WidgetControl(options=['position', 'transparen…" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -154,338 +154,6 @@ "Map" ] }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import tensorflow as tf\n", - "from numpy.lib.recfunctions import structured_to_unstructured\n", - "unstruct = structured_to_unstructured(patch)\n", - "# print(unstruct)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "rescaled = unstruct.astype(np.float64) / 10000\n", - "# print(rescaled)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "transposed = np.transpose(rescaled, (1, 2, 0))\n", - "# print(transposed)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# how to get numpy array of 4 bands into correct shape for model prediction\n", - "reshaped = np.reshape(transposed, (1, 32, 32, 4))\n", - "# print(reshaped)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model found: resnet\n", - "Model: \"model\"\n", - "_________________________________________________________________\n", - " Layer (type) Output Shape Param # \n", - "=================================================================\n", - " input_2 (InputLayer) [(None, 32, 32, 4)] 0 \n", - " \n", - " resnet50 (Functional) (None, 1, 1, 2048) 23590848 \n", - " \n", - " flatten (Flatten) (None, 2048) 0 \n", - " \n", - " dense (Dense) (None, 256) 524544 \n", - " \n", - " dense_1 (Dense) (None, 1) 257 \n", - " \n", - "=================================================================\n", - "Total params: 24,115,649\n", - "Trainable params: 0\n", - "Non-trainable params: 24,115,649\n", - "_________________________________________________________________\n", - "None\n", - "tf.Tensor([[0.01512885]], shape=(1, 1), dtype=float32)\n" - ] - } - ], - "source": [ - "# 5-epoch resnet trained on full tfrecord set (tfrecords/all)\n", - "model_name = \"resnet\"\n", - "optimizer = \"adam\"\n", - "loss_function = \"binary_crossentropy\"\n", - "checkpoint = \"C:\\\\fao-models\\\\saved_models\\\\resnet-epochs5-batch64-lr001-seed5-lrdecay5-tfrecords-all\\\\best_model.h5\"\n", - "# load several model versions into memory..\n", - "model = get_model(model_name, optimizer=optimizer, loss_fn=loss_function)\n", - "model.load_weights(checkpoint)\n", - "freeze(model)\n", - "\n", - "# print(model.summary())\n", - "prediction = model(reshaped)\n", - "print(prediction)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model found: resnet\n", - "tf.Tensor([[0.00071617]], shape=(1, 1), dtype=float32)\n" - ] - } - ], - "source": [ - "# 30-epoch resnet with 86% binary accuracy\n", - "model_name = \"resnet\"\n", - "optimizer = \"adam\"\n", - "loss_function = \"binary_crossentropy\"\n", - "checkpoint = \"C:\\\\fao-models\\\\saved_models\\\\resnet-epochs30-batch64-lr001\\\\best_model.h5\"\n", - "\n", - "# load several model versions into memory..\n", - "model = get_model(model_name, optimizer=optimizer, loss_fn=loss_function)\n", - "model.load_weights(checkpoint)\n", - "freeze(model)\n", - "# print(model.summary())\n", - "# apply sigmoid fn to from logits to prob\n", - "prediction = model(reshaped)\n", - "print(prediction)" - ] - }, { "cell_type": "code", "execution_count": 11, @@ -529,154 +197,44 @@ "name": "stdout", "output_type": "stream", "text": [ - "Model found: mobilenet_v3small\n", - "tf.Tensor([[0.00288773]], shape=(1, 1), dtype=float32)\n" - ] - } - ], - "source": [ - "# 10 epoch mobilenetv3small with 82% acc\n", - "model_name = \"mobilenet_v3small\"\n", - "optimizer = \"adam\"\n", - "loss_function = \"binary_crossentropy\"\n", - "checkpoint = \"C:\\\\fao-models\\\\saved_models\\\\mobilenetv3small-epochs10-batch32-lr01\\\\best_model.h5\"\n", - "\n", - "# load several model versions into memory..\n", - "model = get_model(model_name, optimizer=optimizer, loss_fn=loss_function)\n", - "model.load_weights(checkpoint)\n", - "freeze(model)\n", - "\n", - "# print(model.summary())\n", - "prediction = model(reshaped)\n", - "print(prediction)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "(1, 32, 32, 4)\n", "Model found: resnet\n", - "tf.Tensor([[0.03912623]], shape=(1, 1), dtype=float32)\n" - ] - } - ], - "source": [ - "# 15 epoch resnet with 98% binary accuracy \n", - "model_name = \"resnet\"\n", - "optimizer = \"adam\"\n", - "loss_function = \"binary_crossentropy\"\n", - "checkpoint = \"C:\\\\fao-models\\\\saved_models\\\\resnet-epochs5-batch64-lr001-seed5-lrdecay5\\\\best_model.h5\"\n", - "# load several model versions into memory..\n", - "model = get_model(model_name, optimizer=optimizer, loss_fn=loss_function)\n", - "model.load_weights(checkpoint)\n", - "freeze(model)\n", - "# print(model.summary())\n", - "prediction = model(reshaped)\n", - "print(prediction)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model found: mobilenet_v3small\n", - "tf.Tensor([[0.9805746]], shape=(1, 1), dtype=float32)\n" + "(0.95, 'Forest')\n" ] } ], "source": [ - "# very early model.. didn't even save its training params\n", - "model_name = \"mobilenet_v3small\"\n", - "optimizer = \"adam\"\n", - "loss_function = \"binary_crossentropy\"\n", + "def to_tensor(patch):\n", + " \"\"\"\n", + " Converts a numpy array to a tf tensor\n", + " \"\"\"\n", + " from numpy.lib.recfunctions import structured_to_unstructured\n", + " \n", + " unstruct = structured_to_unstructured(patch) # converts to CHW shape\n", + " rescaled = unstruct.astype(np.float64) / 10000 # scale it \n", + " reshaped = np.reshape(rescaled, (1, 32, 32, 4)) # batch it\n", + " return reshaped\n", + "\n", + "def make_inference(tensor):\n", + " \"\"\"Loads model for inference and returns prediction on the provided tensor\"\"\"\n", + " import numpy as np\n", + " # 20-epoch resnet trained on full tfrecord set (tfrecords/all)\n", + " model_name = \"resnet\"\n", + " optimizer = \"adam\"\n", + " loss_function = \"binary_crossentropy\"\n", + " checkpoint = \"C:\\\\fao-models\\\\saved_models\\\\resnet-epochs20-batch64-lr001-seed5-lrdecay5-tfrecords-all\\\\best_model.h5\"\n", + " model = get_model(model_name, optimizer=optimizer, loss_fn=loss_function, training_mode=True)\n", + " model.load_weights(checkpoint)\n", + " freeze(model)\n", "\n", - "checkpoint = \"C:\\\\fao-models\\\\saved_models\\\\mobilenet_v3small_batch255\\\\best_model.h5\"\n", + " prob = round(float(model(tensor).numpy()),2)\n", + " prediction = \"Forest\" if prob > 0.5 else \"Non-Forest\"\n", + " return prob,prediction\n", "\n", - "# load several model versions into memory..\n", - "model = get_model(model_name, optimizer=optimizer, loss_fn=loss_function)\n", - "model.load_weights(checkpoint)\n", - "freeze(model)\n", - "# print(model.summary())\n", - "prediction = model(reshaped)\n", - "print(prediction)" + "tensor = to_tensor(patch)\n", + "print(tensor.shape)\n", + "out = make_inference(tensor)\n", + "print(out)" ] } ], diff --git a/fao_models/validate_eval_metrics.py b/fao_models/validate_eval_metrics.py index b23fc85..dffe828 100644 --- a/fao_models/validate_eval_metrics.py +++ b/fao_models/validate_eval_metrics.py @@ -4,7 +4,7 @@ import dataloader as dl import numpy as np -data_dir = "tfrecords/all" +data_dir = "C:\\fao-models\\tfrecords\\all" batch_size = 64 buffer_size = 135232 total_examples = buffer_size @@ -55,7 +55,7 @@ from pprint import pformat import numpy as np -checkpoint = "expriments/resnet-epochs20-batch64-lr001-seed5-lrdecay5-tfrecords-all/best_model.h5" +checkpoint = "C:\\fao-models\\saved_models\\resnet-epochs20-batch64-lr001-seed5-lrdecay5-tfrecords-all\\best_model.h5" # checkpoint = "C:\\fao-models\\saved_models\\mobilenet_v3small_batch255\\best_model.h5" model = get_model( model_name="resnet", @@ -168,3 +168,5 @@ def recall(y_true, y_pred): # print('y_true, y_pred') # for i in list(zip(y_true_val[:10],np.round(y_pred_val)[:10])): # print(i) + +# %% diff --git a/runc-mobilenetv3small-epochs10-batch32-lr01_testPredict.yml b/runc-mobilenetv3small-epochs10-batch32-lr01_testPredict.yml new file mode 100644 index 0000000..912ba11 --- /dev/null +++ b/runc-mobilenetv3small-epochs10-batch32-lr01_testPredict.yml @@ -0,0 +1,21 @@ +experiment_name: mobilenetv3small-epochs10-batch32-lr01 +model_name: mobilenet_v3small +data_dir: tfrecords/all +checkpoint: saved_models/mobilenetv3small-epochs10-batch32-lr01/best_model.h5 +total_examples: 70000 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +test_split: 0.2 # float or null +val_split: 0.1 +seed: 5 +decay_rate: 1 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 10 +learning_rate: 0.01 +batch_size: 32 +buffer_size: 70000 + +early_stopping_patience: 5 # null or int diff --git a/runc-mobilenetv3small-epochs30-batch64-lr001_testPredict.yml b/runc-mobilenetv3small-epochs30-batch64-lr001_testPredict.yml new file mode 100644 index 0000000..9730273 --- /dev/null +++ b/runc-mobilenetv3small-epochs30-batch64-lr001_testPredict.yml @@ -0,0 +1,21 @@ +experiment_name: mobilenetv3small-epochs30-batch64-lr001 +model_name: mobilenet_v3small +data_dir: tfrecords/train_test +val_data_dir: tfrecords/val +checkpoint: saved_models/mobilenetv3small-epochs30-batch64-lr001/best_model.h5 +total_examples: 70000 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 +seed: 5 +decay_rate: 1 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 30 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 76992 + +early_stopping_patience: 5 # null or int diff --git a/runc-resnet-epochs10-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml b/runc-resnet-epochs10-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml new file mode 100644 index 0000000..690abb2 --- /dev/null +++ b/runc-resnet-epochs10-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml @@ -0,0 +1,20 @@ +experiment_name: resnet-epochs10-batch64-lr001-seed5-lrdecay5-tfrecords-all +model_name: resnet +data_dir: tfrecords/all +checkpoint: saved_models/resnet-epochs10-batch64-lr001-seed5-lrdecay5-tfrecords-all/best_model.h5 +total_examples: 135232 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 +seed: 5 +decay_rate: 5 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 10 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 135232 + +early_stopping_patience: null # null or int diff --git a/runc-resnet-epochs10-batch64-lr001-seed5.yml b/runc-resnet-epochs10-batch64-lr001-seed5.yml new file mode 100644 index 0000000..3f3c6a4 --- /dev/null +++ b/runc-resnet-epochs10-batch64-lr001-seed5.yml @@ -0,0 +1,19 @@ +experiment_name: resnet-epochs10-batch64-lr001-seed5 +model_name: resnet +data_dir: tfrecords/all +checkpoint: saved_models/resnet-epochs10-batch64-lr001-seed5/best_model.h5 +total_examples: 76992 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 +seed: 5 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 10 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 76992 + +early_stopping_patience: null # null or int diff --git a/runc-resnet-epochs100-batch64-lr001.yml b/runc-resnet-epochs100-batch64-lr001.yml new file mode 100644 index 0000000..f8a8ed8 --- /dev/null +++ b/runc-resnet-epochs100-batch64-lr001.yml @@ -0,0 +1,18 @@ +experiment_name: resnet-epochs100-batch64-lr001 +model_name: resnet +data_dir: tfrecords/all +checkpoint: saved_models/resnet-epochs100-batch64-lr001/best_model.h5 +total_examples: 76992 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 100 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 76992 + +early_stopping_patience: null # null or int diff --git a/runc-resnet-epochs20-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml b/runc-resnet-epochs20-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml new file mode 100644 index 0000000..63edc1a --- /dev/null +++ b/runc-resnet-epochs20-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml @@ -0,0 +1,20 @@ +experiment_name: resnet-epochs20-batch64-lr001-seed5-lrdecay5-tfrecords-all +model_name: resnet +data_dir: tfrecords/all +checkpoint: saved_models/resnet-epochs20-batch64-lr001-seed5-lrdecay5-tfrecords-all/best_model.h5 +total_examples: 135232 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 +seed: 5 +decay_rate: 5 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 20 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 135232 + +early_stopping_patience: null # null or int diff --git a/runc-resnet-epochs30-batch64-lr001-seed5-lrdecay2_5.yml b/runc-resnet-epochs30-batch64-lr001-seed5-lrdecay2_5.yml new file mode 100644 index 0000000..773be59 --- /dev/null +++ b/runc-resnet-epochs30-batch64-lr001-seed5-lrdecay2_5.yml @@ -0,0 +1,20 @@ +experiment_name: resnet-epochs5-batch64-lr001-seed5-lrdecay5-check-splits +model_name: resnet +data_dir: tfrecords/all +checkpoint: saved_models/resnet-epochs5-batch64-lr001-seed5-lrdecay5-check-splits/best_model.h5 +total_examples: 76992 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 +seed: 5 +decay_rate: 5 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 30 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 76992 + +early_stopping_patience: null # null or int diff --git a/runc-resnet-epochs30-batch64-lr001-seed5-lrdecay5.yml b/runc-resnet-epochs30-batch64-lr001-seed5-lrdecay5.yml new file mode 100644 index 0000000..338c5e7 --- /dev/null +++ b/runc-resnet-epochs30-batch64-lr001-seed5-lrdecay5.yml @@ -0,0 +1,20 @@ +experiment_name: resnet-epochs30-batch64-lr001-seed5-lrdecay5 +model_name: resnet +data_dir: tfrecords/all +checkpoint: saved_models/resnet-epochs30-batch64-lr001-seed5-lrdecay5/best_model.h5 +total_examples: 76992 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 +seed: 5 +decay_rate: 5 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 30 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 76992 + +early_stopping_patience: null # null or int diff --git a/runc-resnet-epochs5-batch64-lr001-seed42.yml b/runc-resnet-epochs5-batch64-lr001-seed42.yml new file mode 100644 index 0000000..6e3beff --- /dev/null +++ b/runc-resnet-epochs5-batch64-lr001-seed42.yml @@ -0,0 +1,19 @@ +experiment_name: resnet-epochs5-batch64-lr001-seed42 +model_name: resnet +data_dir: tfrecords/all +checkpoint: saved_models/resnet-epochs5-batch64-lr001-seed42/best_model.h5 +total_examples: 76992 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 +seed: 42 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 5 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 76992 + +early_stopping_patience: null # null or int diff --git a/runc-resnet-epochs5-batch64-lr001-seed5-lrdecay5-rerun-afterdatadownload.yml b/runc-resnet-epochs5-batch64-lr001-seed5-lrdecay5-rerun-afterdatadownload.yml new file mode 100644 index 0000000..8af5be2 --- /dev/null +++ b/runc-resnet-epochs5-batch64-lr001-seed5-lrdecay5-rerun-afterdatadownload.yml @@ -0,0 +1,20 @@ +experiment_name: resnet-epochs5-batch64-lr001-seed5-lrdecay5-rerun-afterdatadownload +model_name: resnet +data_dir: tfrecords/all +checkpoint: saved_models/resnet-epochs5-batch64-lr001-seed5-lrdecay5-rerun-afterdatadownload/best_model.h5 +total_examples: 76992 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 +seed: 5 +decay_rate: 5 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 1 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 76992 + +early_stopping_patience: null # null or int diff --git a/runc-resnet-epochs5-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml b/runc-resnet-epochs5-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml new file mode 100644 index 0000000..6904248 --- /dev/null +++ b/runc-resnet-epochs5-batch64-lr001-seed5-lrdecay5-tfrecords-all.yml @@ -0,0 +1,20 @@ +experiment_name: resnet-epochs5-batch64-lr001-seed5-lrdecay5-tfrecords-all +model_name: resnet +data_dir: tfrecords/all +checkpoint: saved_models/resnet-epochs5-batch64-lr001-seed5-lrdecay5-tfrecords-all/best_model.h5 +total_examples: 135232 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 +seed: 5 +decay_rate: 5 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 5 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 135232 + +early_stopping_patience: null # null or int diff --git a/runc-resnet-epochs5-batch64-lr001-seed5-reshuffledEachIter.yml b/runc-resnet-epochs5-batch64-lr001-seed5-reshuffledEachIter.yml new file mode 100644 index 0000000..1d4b142 --- /dev/null +++ b/runc-resnet-epochs5-batch64-lr001-seed5-reshuffledEachIter.yml @@ -0,0 +1,19 @@ +experiment_name: resnet-epochs5-batch64-lr001-seed5-reshuffledEachIter +model_name: resnet +data_dir: tfrecords/all +checkpoint: saved_models/resnet-epochs5-batch64-lr001-seed5-reshuffledEachIter/best_model.h5 +total_examples: 76992 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 +seed: 5 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 5 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 76992 + +early_stopping_patience: null # null or int diff --git a/runc-resnet-epochs5-batch64-lr001_testingBugFix.yml b/runc-resnet-epochs5-batch64-lr001_testingBugFix.yml new file mode 100644 index 0000000..412c1cd --- /dev/null +++ b/runc-resnet-epochs5-batch64-lr001_testingBugFix.yml @@ -0,0 +1,19 @@ +experiment_name: resnet-epochs5-batch64-lr001-testBugFix +model_name: resnet +data_dir: tfrecords/all +checkpoint: saved_models/resnet-epochs5-batch64-lr001-testBugFix/best_model.h5 +total_examples: 76992 # number of geotiffs not tfrecords +test_split: 0.2 # float or null +val_split: 0.1 +seed: 5 + +optimizer: adam +optimizer_use_lr_schedular: true +loss_function: binary_crossentropy + +epochs: 5 +learning_rate: 0.001 +batch_size: 64 +buffer_size: 76992 + +early_stopping_patience: null # null or int