-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
125 lines (110 loc) · 5.58 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from scripts.roboflow_data import get_roboflow_data
from scripts.argparse import create_parser
from scripts.train import train_model
from scripts.model_evaluation import evaluate_model
from scripts.mlflow_utils.register_model import register_model
from scripts.mlflow_utils.mlflow_build_image import build_and_run_sagemaker_image
import sys
import logging
import json
import os
import glob
import yaml
# Get arguments:
# Execute the parse_args() method to get arguments
args = create_parser().parse_args()
if args.train_model == True and (args.register_model == True or args.model_evaluation == True) and args.model_path != None:
raise ValueError("Argument: --model_path can only be specified if the argument 'train_model' is set to False when a model will be registered or evaluated")
if args.train_model == False and (args.register_model == True or args.model_evaluation == True) and args.model_path == None:
raise ValueError("If the argument: 'train_model' is set to False then the optional argument --model_path must be specified when a model will be registered or evaluated")
# Clean log directory
if args.remove_logs:
for filename in glob.glob("log/*.log*"):
try:
os.remove(filename)
except OSError:
pass
# Setting where to write log file
log_path = os.path.join(os.path.dirname(__file__), "log", "train_model.log")
os.makedirs(os.path.dirname(log_path), exist_ok=True)
# Setting the basic configuration of the log file to write to a file and to the console
logging.basicConfig(
handlers=[logging.FileHandler(log_path), logging.StreamHandler(sys.stdout)],
level=logging.INFO,
format="[%(asctime)s] %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logging_level = logging.getLevelName(args.logging_level)
logging.getLogger().setLevel(args.logging_level)
logging.debug(f"YOLOv8 Model training log path: {log_path}")
def main():
"""
This python script orchestrates a series of function calls to get data, train a model, evaluate a model, register a model,
and build a docker image off the model. The script starts by parsing the arguments passed to it using the create_parser()
function in argparse.py and executing the parse_args() method. The arguments determine the execution flow of the script.
The full flow includes: getting data, training the model, evaluating the model, registering the model, and building a
docker image off that registered model.
Raises:
ValueError: If inccorect arguements are specified. I.e model_path and train_model
"""
# Get Default parameters
with open("config/model_config.yaml", "r") as file:
yaml_inputs = yaml.safe_load(file)
roboflow_params = yaml_inputs["roboflow_params"]
dataset_dir = os.path.join(
yaml_inputs["roboflow_params"]["write_dataset"],
yaml_inputs["roboflow_params"]["project"]
+ "-"
+ str(yaml_inputs["roboflow_params"]["data_version"]),
)
logging.debug(
f"Roboflow parameters: {json.dumps(roboflow_params, indent=4, sort_keys=True)}"
)
training_params = yaml_inputs["training_params"]
logging.debug(
f"Training parameters: {json.dumps(training_params, indent=4, sort_keys=True)}"
)
mlflow_params = yaml_inputs["mlflow_params"]
logging.debug(
f"MLflow parameters: {json.dumps(mlflow_params, indent=4, sort_keys=True)}"
)
if args.get_data:
if args.roboflow_api_key:
logging.info("Starting to download training, validation and test images")
get_roboflow_data(api_key=args.roboflow_api_key, **roboflow_params)
logging.info(f"Photo download complete at: {dataset_dir}")
else:
raise ValueError("Specify a Roboflow API key if parameter 'get_data' is set to True")
if args.train_model:
logging.info(
"Starting a training job. For details around configurations see 'config/model_config.yaml"
)
model = train_model(dataset_dir=dataset_dir, **training_params)
logging.info("Training complete.")
if args.model_evaluation:
logging.info(
f"Evaluating the model: {model.trainer.save_dir if args.train_model else args.model_path}"
)
evaluation_thresholds = yaml_inputs["evaluation_thresholds"]
checks_passed = evaluate_model(
save_dir=model.trainer.save_dir if args.train_model else args.model_path,evaluation_thresholds=evaluation_thresholds,evaluation_metrics=["mAP50","mAP50-95","precision","recall"])
if not checks_passed:
logging.error("Model: best.pt metrics failed to meet or exceed evaluation thresholds, model will not be registered and docker image will not be built")
if args.register_model and (checks_passed if args.model_evaluation else args.register_model):
logging.info(
f"Registering the model: {model.trainer.save_dir if args.train_model else args.model_path}"
)
# Register the model
register_model(
experiment_name=mlflow_params["experiment_name"],
model_name=mlflow_params["model_name"],
save_dir=model.trainer.save_dir if args.train_model else args.model_path)
if args.build_image and checks_passed if args.model_evaluation else args.build_image:
logging.info(
f"Building an image for model: {mlflow_params['model_name']} "
)
build_and_run_sagemaker_image(mlflow_params['model_name'],mlflow_params['image_name'],mlflow_params['version'],mlflow_params['docker_port'],)
if __name__ == "__main__":
logging.info("ML Run Process Start")
main()
logging.info("ML Run Process Complete")