Skip to content

Commit

Permalink
These examples are at the request. It is a quick fix, I need to furth…
Browse files Browse the repository at this point in the history
…er improve them.
  • Loading branch information
sztoor committed Nov 25, 2024
1 parent aa0ce51 commit 5496566
Show file tree
Hide file tree
Showing 21 changed files with 1,003 additions and 0 deletions.
58 changes: 58 additions & 0 deletions examples/lung-cancer-image-data/client/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import os
from math import floor
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split


NUM_CLASSES = 2


def load_data(data_path, is_train=True):


if data_path is None:
data_path = os.environ.get("FEDN_DATA_PATH", "/home/salman/LungCancer/image-data/IQ-OTHNCCD/")

print('data_path:', data_path)

benign_dirs = [data_path+r'/Bengin cases']
Malignant_dir = [data_path+r'/Malignant cases']
Normal_dirs = [data_path+r'/Normal cases']

print('benign_dirs:', benign_dirs)
print('Malignant_dir:', Malignant_dir)
print('Normal_dirs:', Normal_dirs)

filepaths = []
labels = []
dict_lists = [benign_dirs, Malignant_dir, Normal_dirs]
class_labels = ['benign', 'Malignant', 'Normal']

for i, dir_list in enumerate(dict_lists):
for j in dir_list:
flist = os.listdir(j)
for f in flist:
fpath = os.path.join(j, f)
filepaths.append(fpath)
labels.append(class_labels[i])

Fseries = pd.Series(filepaths, name="filepaths")
Lseries = pd.Series(labels, name="labels")
Lung_data = pd.concat([Fseries, Lseries], axis=1)
Lung_df = pd.DataFrame(Lung_data)

Lung_df.shape

train_images, test_images = train_test_split(Lung_df, test_size=0.3, random_state=42)
train_set, val_set = train_test_split(Lung_df, test_size=0.2, random_state=42)

if is_train:
return train_set, val_set
else:
return test_images

if __name__ == "__main__":

load_data(data_path = None)
12 changes: 12 additions & 0 deletions examples/lung-cancer-image-data/client/fedn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
python_env: python_env.yaml
entry_points:
build:
command: python model.py
startup:
command: python data.py
train:
command: python train.py
validate:
command: python validate.py
predict:
command: python predict.py
112 changes: 112 additions & 0 deletions examples/lung-cancer-image-data/client/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import TensorBoard,EarlyStopping

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

from fedn.utils.helpers.helpers import get_helper


HELPER_MODULE = "numpyhelper"
helper = get_helper(HELPER_MODULE)


def compile_model():

model = keras.models.Sequential([
keras.layers.Conv2D(filters=128, kernel_size=(8, 8), strides=(3, 3), activation='relu', input_shape=(224, 224, 3)),
keras.layers.BatchNormalization(),

keras.layers.Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(3, 3)),

keras.layers.Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=256, kernel_size=(1, 1), strides=(1, 1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=256, kernel_size=(1, 1), strides=(1, 1), activation='relu', padding="same"),
keras.layers.BatchNormalization(),

keras.layers.Conv2D(filters=512, kernel_size=(3, 3), activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(2, 2)),

keras.layers.Conv2D(filters=512, kernel_size=(3, 3), activation='relu', padding="same"),
keras.layers.BatchNormalization(),

keras.layers.Conv2D(filters=512, kernel_size=(3, 3), activation='relu', padding="same"),
keras.layers.BatchNormalization(),

keras.layers.MaxPool2D(pool_size=(2, 2)),

keras.layers.Conv2D(filters=512, kernel_size=(3, 3), activation='relu', padding="same"),
keras.layers.BatchNormalization(),

keras.layers.MaxPool2D(pool_size=(2, 2)),

keras.layers.Flatten(),
keras.layers.Dense(1024, activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.Dense(1024, activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.Dense(3, activation='softmax')
])

model.compile(
loss='categorical_crossentropy',
optimizer=tf.optimizers.SGD(learning_rate=0.001),
metrics=['accuracy']
)

return model


def save_parameters(model, out_path):
"""Save model parameters to file.
:param model: The model to serialize.
:type model: keras.model.Sequential
:param out_path: The path to save the model to.
:type out_path: str
"""
weights = model.get_weights()
helper.save(weights, out_path)


def load_parameters(model_path):
"""Load model parameters from file and populate model.
:param model_path: The path to load from.
:type model_path: str
:return: The loaded model.
:rtype: keras.model.Sequential
"""
model = compile_model()
weights = helper.load(model_path)
model.set_weights(weights)
return model


def init_seed(out_path="../seed.npz"):
"""Initialize seed model and save it to file.
:param out_path: The path to save the seed model to.
:type out_path: str
"""
weights = compile_model().get_weights()
helper.save(weights, out_path)


if __name__ == "__main__":
init_seed("../seed.npz")
30 changes: 30 additions & 0 deletions examples/lung-cancer-image-data/client/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import json
import os
import sys

import numpy as np
from data import load_data
from model import load_parameters

dir_path = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.abspath(dir_path))


def predict(in_model_path, out_json_path, data_path=None):
# Using test data for prediction but another dataset could be loaded
x_test, _ = load_data(data_path, is_train=False)

# Load model
model = load_parameters(in_model_path)

# Predict
y_pred = model.predict(x_test)
y_pred = np.argmax(y_pred, axis=1)

# Save JSON
with open(out_json_path, "w") as fh:
fh.write(json.dumps({"predictions": y_pred.tolist()}))


if __name__ == "__main__":
predict(sys.argv[1], sys.argv[2])
17 changes: 17 additions & 0 deletions examples/lung-cancer-image-data/client/python_env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: lung-cancer-image-keras
build_dependencies:
- pip
- setuptools
- wheel
dependencies:
- keras==3.6.0
- tensorboard==2.18.0
- tensorboard-data-server==0.7.2
- tensorflow==2.18.0
- tensorflow-io-gcs-filesystem==0.37.1
- fire==0.3.1
- pillow==11.0.0
- scikit-learn==1.5.2
- pandas==2.2.3
- numpy==2.0.2
- fedn
63 changes: 63 additions & 0 deletions examples/lung-cancer-image-data/client/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import os
import sys

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from data import load_data
from model import load_parameters, save_parameters

from fedn.utils.helpers.helpers import save_metadata

def train(in_model_path, out_model_path, data_path=None, batch_size=40, epochs=1):

# Load data
train_set, val_set = load_data(data_path)

image_gen = ImageDataGenerator(preprocessing_function= tf.keras.applications.mobilenet_v2.preprocess_input)
train = image_gen.flow_from_dataframe(dataframe= train_set,x_col="filepaths",y_col="labels",
target_size=(244,244),
color_mode='rgb',
class_mode="categorical", #used for Sequential Model
batch_size=batch_size,
shuffle=False #do not shuffle data
)
val = image_gen.flow_from_dataframe(dataframe= val_set,x_col="filepaths", y_col="labels",
target_size=(244,244),
color_mode= 'rgb',
class_mode="categorical",
batch_size=batch_size,
shuffle=False
)


classes=list(train.class_indices.keys())



# Load model
model = load_parameters(in_model_path)



# Train
model.fit(train, epochs=epochs, validation_data=val, verbose=1)

# Metadata needed for aggregation server side
metadata = {
# num_examples are mandatory
"num_examples": len(train),
"batch_size": batch_size,
"epochs": epochs,
}

# Save JSON metadata file (mandatory)
save_metadata(metadata, out_model_path)

# Save model update (mandatory)
save_parameters(model, out_model_path)


if __name__ == "__main__":
train(sys.argv[1], sys.argv[2])
63 changes: 63 additions & 0 deletions examples/lung-cancer-image-data/client/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import os
import sys

import numpy as np
from data import load_data
from model import load_parameters
import tensorflow as tf
from fedn.utils.helpers.helpers import save_metrics

from tensorflow.keras.preprocessing.image import ImageDataGenerator


def validate(in_model_path, out_json_path, data_path=None):

# Load data
train_set, val_set = load_data(data_path)
test_images = load_data(data_path, is_train=False)

image_gen = ImageDataGenerator(preprocessing_function= tf.keras.applications.mobilenet_v2.preprocess_input)
test = image_gen.flow_from_dataframe(dataframe= test_images,x_col="filepaths", y_col="labels",
target_size=(244,244),
color_mode='rgb',
class_mode="categorical",
batch_size=4,
shuffle= False
)
val = image_gen.flow_from_dataframe(dataframe= val_set,x_col="filepaths", y_col="labels",
target_size=(244,244),
color_mode= 'rgb',
class_mode="categorical",
batch_size=4,
shuffle=False
)

# Load model
model = load_parameters(in_model_path)


# Evaluate
model_score = model.evaluate(val, verbose=1)
model_score_test = model.evaluate(test, verbose=1)
y_pred = model.predict(test)
y_pred = np.argmax(y_pred, axis=1)


print('model_score: ', model_score)
print('model_score_test: ', model_score_test)
print('y_pred: ', y_pred)

# JSON schema
report = {
"training_loss": model_score[0],
"training_accuracy": model_score[1],
"test_loss": model_score_test[0],
"test_accuracy": model_score_test[1],
}

# Save JSON
save_metrics(report, out_json_path)


if __name__ == "__main__":
validate(sys.argv[1], sys.argv[2])
Loading

0 comments on commit 5496566

Please sign in to comment.