diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 0000000..528f30c --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 0000000..e69de29 diff --git a/.dvcignore b/.dvcignore new file mode 100644 index 0000000..5197305 --- /dev/null +++ b/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/.github/workflows/.gitkeep b/.github/workflows/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..fd2edb3 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,18 @@ +name: CML +on: [push] +jobs: + train-and-report: + runs-on: ubuntu-latest + container: docker://ghcr.io/iterative/cml:0-dvc2-base1 + steps: + - uses: actions/checkout@v3 + - run: | + pip install -r requirements.txt + python main.py # create score.json + + # Create CML report + cat score.json >> report.md + # echo '![](./plot.png "Confusion Matrix")' >> report.md + + env: + REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..47a6f5d --- /dev/null +++ b/.gitignore @@ -0,0 +1,166 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +ml-environment/ +artifacts/ +data/ +LOGS/ +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +# ml-environment + +# Spyder project settings +.spyderproject +.spyproject +artifacts + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ce05066 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Aman Kumar + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..de12d95 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +# End-to-End-Image-Classification-MLOPS-implementation + + + + + diff --git a/config/config.yaml b/config/config.yaml new file mode 100644 index 0000000..9d4ff32 --- /dev/null +++ b/config/config.yaml @@ -0,0 +1,23 @@ +artifacts_root: artifacts + +data_ingestion: + root_dir: data + source_URL: https://github.com/Aman123lug/Data-storage-for-all-projects/raw/main/Chicken-fecal-images.zip + local_data_file: artifacts/data_ingestion/data.zip + unzip_dir: artifacts/data_ingestion + +prepare_base_model: + root_dir: artifacts/prepare_base_model + base_model_config: artifacts/prepare_base_model/base_model.h5 + updated_base_model_config: artifacts/prepare_base_model/base_model_updated.h5 + + +prepare_callbacks: + root_dir: artifacts/prepare_callbacks + tensorboard_root_log_dir: artifacts/prepare_callbacks/tensorboard_root_log_dir + checkpoint_model_filepath: artifacts/prepare_callbacks/checkpoint_root_log_dir/model.h5 + + +training: + root_dir: artifacts/training + train_model_path: artifacts/training/model.h5 diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 0000000..1999fc7 --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,58 @@ +stages: + data_ingestion: + cmd: python src/cnnClassifier/pipline/stage_01_data_ingestion.py + deps: + - src/cnnClassifier/pipline/stage_01_data_ingestion.py + - config/config.yaml + outs: + - data/Chicken-fecal-images + + prepare_base_model: + cmd: python src/cnnClassifier/pipline/stage_02_prepare_base_model.py + deps: + - src/cnnClassifier/pipline/stage_02_prepare_base_model.py + - config/config.yaml + + params: + - IMAGE_SIZE + - INCLUDE_TOP + - CLASSES + - WEIGHTS + - LEARNING_RATE + + outs: + - artifacts/prepare_base_model + + + training: + cmd: python src/cnnClassifier/pipline/stage_03_training.py + deps: + + - src/cnnClassifier/pipline/stage_03_training.py + - src/cnnClassifier/components/prepare_callbacks.py + - config/config.yaml + - data/Chicken-fecal-images + - artifacts/prepare_base_model + + params: + - EPOCH + - BATCH_SIZE + - IMAGE_SIZE + - AUGMENTATION + + outs: + - artifacts/training/model.h5 + + model_evaluation: + cmd: python src/cnnClassifier/pipline/stage_04_evaluation.py + deps: + - src/cnnClassifier/pipline/stage_04_evaluation.py + - config/config.yaml + - data/Chicken-fecal-images + - artifacts/training/model.h5 + params: + - IMAGE_SIZE + - BATCH_SIZE + metrics: + - score.josn: + cache: false \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..ece3ea1 --- /dev/null +++ b/main.py @@ -0,0 +1,51 @@ +from src.cnnClassifier.loggerr import logger +from src.cnnClassifier.pipline.stage_01_data_ingestion import DataIngestionTrainingPipeline +from src.cnnClassifier.pipline.stage_02_prepare_base_model import PrepareBaseModelTrainingPipeline +from src.cnnClassifier.pipline.stage_03_training import ModelTrainingPipeline +from src.cnnClassifier.pipline.stage_04_evaluation import ModelEvaluation + +STAGE_NAME = "Data Ingestion" +try: + logger.info(f" <<<< stage {STAGE_NAME} <<<< started") + data_ingestion = DataIngestionTrainingPipeline() + data_ingestion.main() + logger.info(f" <<<< stage {STAGE_NAME} >>>> completed !") + +except Exception as e: + # logger.exception(e) + raise e + +STAGE_NAME = "Prepare Base Model" +try: + logger.info(f" <<<< stage {STAGE_NAME} <<<< started") + base_model = PrepareBaseModelTrainingPipeline() + base_model.main() + logger.info(f" <<<< stage {STAGE_NAME} >>>> completed !") + +except Exception as e: + logger.exception(e) + raise e + + +STAGE_NAME = "Training" +try: + logger.info(f"<<<< stage {STAGE_NAME} started >>>>") + train = ModelTrainingPipeline() + train.main() + logger.info(f"<<<< stage {STAGE_NAME} completed >>>>") + +except Exception as e: + logger.exception(e) + raise e + + +STAGE_NAME = "Model Evaluation" +try: + logger.info(f"<<<< Stage {STAGE_NAME} started") + evaluate = ModelEvaluation() + evaluate.main() + logger.info(f"<<<< stage {STAGE_NAME} completed >>>>") + +except Exception as e: + logger.exception(e) + raise e diff --git a/params.yaml b/params.yaml new file mode 100644 index 0000000..c0abe3b --- /dev/null +++ b/params.yaml @@ -0,0 +1,11 @@ +AUGMENTATION: True +IMAGE_SIZE: [224, 224, 3] +BATCH_SIZE: 16 +INCLUDE_TOP: False +EPOCH: 1 +LEARNING_RATE: 0.01 +CLASSES: 2 +WEIGHTS: imagenet + + + diff --git a/project-image.png b/project-image.png new file mode 100644 index 0000000..d9d08e5 Binary files /dev/null and b/project-image.png differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..717148c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,18 @@ +scikit-learn +tensorflow +dvc +pandas +numpy +notebook +matplotlib +seaborn +python-box==6.0.2 +pyYAML +tqdm +ensure==1.0.2 +joblib +spacy +Flask +Flask-Cors + +-e . diff --git a/research/01_data_ingestion.ipynb b/research/01_data_ingestion.ipynb new file mode 100644 index 0000000..dd2d665 --- /dev/null +++ b/research/01_data_ingestion.ipynb @@ -0,0 +1,247 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.getcwd()\n", + "os.chdir(\"../\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass\n", + "from pathlib import Path\n", + "\n", + "@dataclass(frozen=True)\n", + "class DataIngestionConfig:\n", + " root_dir: Path\n", + " source_URL: str\n", + " local_data_file: Path\n", + " unzip_dir: Path\n", + " \n", + "#config_entity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "d:\\End-to-End chicken-diesase-implementation\n" + ] + } + ], + "source": [ + "from src.cnnClassifier.constant import *\n", + "from src.cnnClassifier.utils.common import load_bin, read_yaml, create_directories" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "class ConfigurationManager:\n", + " def __init__(self, configfile_path = CONFIG_FILE_PATH, paramsfile_path = PARAMS_FILE_PATH) -> None:\n", + " \n", + " self.config = read_yaml(configfile_path)\n", + " self.params = read_yaml(paramsfile_path)\n", + " \n", + " create_directories([self.config.artifacts_root])\n", + " \n", + " def get_data_ingestion_config(self) -> DataIngestionConfig:\n", + " config = self.config.data_ingestion\n", + " \n", + " create_directories([config.root_dir])\n", + " \n", + " \n", + " data_ingestion_config = DataIngestionConfig(\n", + " \n", + " root_dir=config.root_dir,\n", + " source_URL=config.source_URL,\n", + " local_data_file=config.local_data_file,\n", + " unzip_dir=config.unzip_dir\n", + " \n", + " )\n", + " return data_ingestion_config\n", + " \n", + "# configuration.py" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib.request as request\n", + "import zipfile\n", + "from src.cnnClassifier import logger\n", + "from src.cnnClassifier.utils.common import get_size\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "class DataIngestion:\n", + " def __init__(self, config: DataIngestionConfig) -> None:\n", + " self.config = config\n", + " \n", + " def download_file(self):\n", + " try:\n", + " logger.info(\"Fetching zip file...\")\n", + " if not os.path.exists(self.config.local_data_file):\n", + " filename, dirname = request.urlretrieve(\n", + " url=self.config.source_URL,\n", + " filename=self.config.local_data_file\n", + " )\n", + " logger.info(f\"File Download Successfully {filename} with : {dirname}\")\n", + " \n", + " else:\n", + " logger.info(f\"File already exists of size: {get_size(Path(self.config.local_data_file))}\") \n", + " \n", + " except Exception as e:\n", + " logger.exception(e)\n", + " raise e\n", + " \n", + " \n", + " def extract_zip_file(self):\n", + " \n", + " unzip_dir = self.config.local_data_file\n", + "\n", + " with zipfile.ZipFile(unzip_dir, \"r\") as unzip:\n", + " logger.info(\"zipefile read successfully\")\n", + " unzip.extractall(\"data\")\n", + " logger.info(\"zipfile extracted successfully\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-07-26 22:01:17,567 - INFO - common - yaml file: config\\config.yaml loaded successfully]\n", + "[2023-07-26 22:01:17,583 - INFO - common - yaml file: params.yaml loaded successfully]\n", + "[2023-07-26 22:01:17,586 - INFO - common - Directory created ! artifacts]\n", + "[2023-07-26 22:01:17,591 - INFO - common - Directory created ! data]\n", + "[2023-07-26 22:01:17,597 - INFO - 2118982511 - Fetching zip file...]\n", + "[2023-07-26 22:01:17,601 - INFO - 2118982511 - File already exists of size: ~ 11345 KB]\n", + "[2023-07-26 22:01:17,625 - INFO - 2118982511 - zipefile read successfully]\n", + "[2023-07-26 22:01:18,168 - INFO - 2118982511 - zipfile extracted successfully]\n" + ] + } + ], + "source": [ + "\n", + "try:\n", + " config = ConfigurationManager()\n", + " get_config_data = config.get_data_ingestion_config()\n", + " data_ingestion = DataIngestion(get_config_data)\n", + " data_ingestion.download_file()\n", + " data_ingestion.extract_zip_file()\n", + " \n", + "except Exception as e:\n", + " e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://github.com/Aman123lug/End-to-End-Chicken-Disease-implementation/raw/main/data/Chicken-fecal-images.zip'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ml-environment", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/02_prepare_base_model.ipynb b/research/02_prepare_base_model.ipynb new file mode 100644 index 0000000..ae0f4c5 --- /dev/null +++ b/research/02_prepare_base_model.ipynb @@ -0,0 +1,380 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.chdir(\"../\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass\n", + "from pathlib import Path\n", + "\n", + "@dataclass\n", + "class PrepareBaseModelConfig:\n", + " root_dir: Path\n", + " base_model_path: Path\n", + " updated_base_model: Path\n", + " params_image_size: list\n", + " params_learning_rate: float\n", + " params_include_top: bool\n", + " params_weight: str\n", + " params_classes: int\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "d:\\End-to-End chicken-diesase-implementation\n" + ] + } + ], + "source": [ + "from src.cnnClassifier.constant import *\n", + "from src.cnnClassifier.utils.common import create_directories,read_yaml" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class ConfigurationManager:\n", + " def __init__(self, configpath = CONFIG_FILE_PATH, paramspath = PARAMS_FILE_PATH) -> None:\n", + " self.config = read_yaml(configpath)\n", + " self.params = read_yaml(paramspath)\n", + " \n", + " create_directories([self.config.artifacts_root])\n", + " \n", + " def get_base_model_config(self) -> PrepareBaseModelConfig:\n", + " \n", + " config = self.config.prepare_base_model\n", + " \n", + " create_directories([config.root_dir])\n", + " \n", + " prepare_base_model_config = PrepareBaseModelConfig(\n", + " root_dir=config.root_dir,\n", + " base_model_path=config.base_model_config,\n", + " updated_base_model=config.updated_base_model_config,\n", + " params_image_size=self.params.IMAGE_SIZE,\n", + " params_learning_rate=self.params.LEARNING_RATE,\n", + " params_include_top=self.params.INCLUDE_TOP,\n", + " params_weight=self.params.WEIGHTS,\n", + " params_classes=self.params.CLASSES\n", + " \n", + " )\n", + " \n", + " return prepare_base_model_config\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib.request as request\n", + "from zipfile import ZipFile\n", + "import tensorflow as tf\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "class PrepareBaseModel:\n", + " def __init__(self, config: PrepareBaseModelConfig) -> None:\n", + " self.config = config\n", + " \n", + " def get_base_model(self):\n", + " self.model = tf.keras.applications.vgg16.VGG16(\n", + " include_top=self.config.params_include_top,\n", + " weights=self.config.params_weight,\n", + " input_shape=self.config.params_image_size,\n", + " classes=self.config.params_classes,\n", + " \n", + " )\n", + " \n", + " self.save_model(path=self.config.base_model_path, model=self.model)\n", + " \n", + " @staticmethod\n", + " def prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):\n", + " if freeze_all:\n", + " for layer in model.layers:\n", + " model.trainlable = False\n", + " \n", + " elif (freeze_till is not None) and (freeze_till > 0):\n", + " for layers in model.layers[:-freeze_till]:\n", + " model.trainable = False\n", + " \n", + " flatten_in = tf.keras.layers.Flatten()(model.output)\n", + " prediction = tf.keras.layers.Dense(\n", + " units=classes,\n", + " activation=\"softmax\"\n", + " )(flatten_in)\n", + " \n", + " \n", + " full_model = tf.keras.models.Model(\n", + " inputs=model.input,\n", + " outputs=prediction\n", + " )\n", + " \n", + " full_model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),\n", + " loss=tf.keras.losses.CategoricalCrossentropy(),\n", + " metrics=[\"accuracy\"]\n", + " \n", + " )\n", + " full_model.summary()\n", + " \n", + " return full_model\n", + " \n", + " \n", + " def update_base_model(self):\n", + " self.full_model = self.prepare_full_model(\n", + " model=self.model,\n", + " classes=self.config.params_classes,\n", + " freeze_all=True,\n", + " freeze_till=None,\n", + " learning_rate=self.config.params_learning_rate\n", + " )\n", + " \n", + " self.save_model(path=self.config.updated_base_model, model=self.full_model)\n", + " \n", + " @staticmethod\n", + " def save_model(path: Path, model: tf.keras.Model):\n", + " model.save(path)\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-07-30 22:49:37,637 - INFO - common - yaml file: config\\config.yaml loaded successfully]\n", + "[2023-07-30 22:49:37,649 - INFO - common - yaml file: params.yaml loaded successfully]\n", + "[2023-07-30 22:49:37,649 - INFO - common - Directory created ! artifacts]\n", + "[2023-07-30 22:49:37,653 - INFO - common - Directory created ! artifacts/prepare_base_model]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-07-30 22:49:38,455 - WARNING - saving_utils - Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.]\n", + "Model: \"model\"\n", + "_________________________________________________________________\n", + " Layer (type) Output Shape Param # \n", + "=================================================================\n", + " input_1 (InputLayer) [(None, 224, 224, 3)] 0 \n", + " \n", + " block1_conv1 (Conv2D) (None, 224, 224, 64) 1792 \n", + " \n", + " block1_conv2 (Conv2D) (None, 224, 224, 64) 36928 \n", + " \n", + " block1_pool (MaxPooling2D) (None, 112, 112, 64) 0 \n", + " \n", + " block2_conv1 (Conv2D) (None, 112, 112, 128) 73856 \n", + " \n", + " block2_conv2 (Conv2D) (None, 112, 112, 128) 147584 \n", + " \n", + " block2_pool (MaxPooling2D) (None, 56, 56, 128) 0 \n", + " \n", + " block3_conv1 (Conv2D) (None, 56, 56, 256) 295168 \n", + " \n", + " block3_conv2 (Conv2D) (None, 56, 56, 256) 590080 \n", + " \n", + " block3_conv3 (Conv2D) (None, 56, 56, 256) 590080 \n", + " \n", + " block3_pool (MaxPooling2D) (None, 28, 28, 256) 0 \n", + " \n", + " block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160 \n", + " \n", + " block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808 \n", + " \n", + " block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808 \n", + " \n", + " block4_pool (MaxPooling2D) (None, 14, 14, 512) 0 \n", + " \n", + " block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808 \n", + " \n", + " block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808 \n", + " \n", + " block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808 \n", + " \n", + " block5_pool (MaxPooling2D) (None, 7, 7, 512) 0 \n", + " \n", + " flatten (Flatten) (None, 25088) 0 \n", + " \n", + " dense (Dense) (None, 2) 50178 \n", + " \n", + "=================================================================\n", + "Total params: 14,764,866\n", + "Trainable params: 14,764,866\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "try:\n", + " config = ConfigurationManager()\n", + " get_base_model_config = config.get_base_model_config()\n", + " prepare_model = PrepareBaseModel(get_base_model_config)\n", + " prepare_model.get_base_model()\n", + " prepare_model.update_base_model()\n", + " \n", + "except Exception as e:\n", + " raise(e)\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import keras\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "model = Sequential()\n", + "# model.add(tf.keras.Input(shape=(16,)))\n", + "model.add(Dense(30, activation=\"relu\", input_shape=(16,)))\n", + "model.add(Dense(20))\n", + "model.add(Dense(10))\n", + "model.add(Dense(3, activation=\"softmax\"))\n", + "\n", + "model.compile(optimizer=\"adam\", loss=\"categoricalcross_entropy\", metrics=[\"accuracy\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "model.trainable = False" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.input" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_4\"\n", + "_________________________________________________________________\n", + " Layer (type) Output Shape Param # \n", + "=================================================================\n", + " dense_13 (Dense) (None, 30) 510 \n", + " \n", + " dense_14 (Dense) (None, 20) 620 \n", + " \n", + " dense_15 (Dense) (None, 10) 210 \n", + " \n", + " dense_16 (Dense) (None, 3) 33 \n", + " \n", + "=================================================================\n", + "Total params: 1,373\n", + "Trainable params: 0\n", + "Non-trainable params: 1,373\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ml-environment", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/03_prepare_callbacks.ipynb b/research/03_prepare_callbacks.ipynb new file mode 100644 index 0000000..e86371a --- /dev/null +++ b/research/03_prepare_callbacks.ipynb @@ -0,0 +1,233 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'d:\\\\End-to-End chicken-diesase-implementation\\\\research'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os \n", + "os.getcwd()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "os.chdir(\"../\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass\n", + "from pathlib import Path\n", + "\n", + "@dataclass\n", + "class PrepareCallbacksConfig:\n", + " root_dir: Path\n", + " tensorboard_root_log_dir: Path\n", + " checkpoint_model_filepath: Path\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "d:\\End-to-End chicken-diesase-implementation\n" + ] + } + ], + "source": [ + "from src.cnnClassifier.constant import *\n", + "from src.cnnClassifier.utils.common import read_yaml, create_directories" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "class ConfigurationManager:\n", + " \n", + " def __init__(self, configfile = CONFIG_FILE_PATH, paramsfile = PARAMS_FILE_PATH) -> None:\n", + " self.config = read_yaml(configfile)\n", + " self.params = read_yaml(paramsfile)\n", + " \n", + " create_directories([self.config.artifacts_root])\n", + " \n", + " \n", + " def get_prepare_callbacks_config(self) -> PrepareCallbacksConfig:\n", + " \n", + " config = self.config.prepare_callbacks\n", + " \n", + " model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)\n", + " \n", + " create_directories([\n", + " Path(model_ckpt_dir),\n", + " Path(config.tensorboard_root_log_dir)\n", + " ])\n", + " \n", + " prepare_callbacks_config = PrepareCallbacksConfig(\n", + " \n", + " root_dir=Path(config.root_dir),\n", + " tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),\n", + " checkpoint_model_filepath=Path(config.checkpoint_model_filepath)\n", + " \n", + " )\n", + " \n", + " return prepare_callbacks_config\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import tensorflow as tf\n", + "from src.cnnClassifier import logger" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "class PrepareCallback:\n", + " def __init__(self, config: PrepareCallbacksConfig) -> None:\n", + " self.config = config\n", + " \n", + " @property\n", + " def _create_tb_callbacks(self):\n", + " timeStamp = time.strftime(\"%Y-%m-%d-%H-%M-%S\")\n", + " \n", + " tb_running_log_dir = os.path.join(\n", + " self.config.tensorboard_root_log_dir,\n", + " f\"tb_log_at_{timeStamp}\",\n", + " \n", + " ) \n", + " \n", + " return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)\n", + " \n", + " @property\n", + " def _create_ckpt_callbacks(self):\n", + " \n", + " return tf.keras.callbacks.ModelCheckpoint(\n", + " self.config.checkpoint_model_filepath,\n", + " save_best_only=True\n", + " )\n", + " \n", + " \n", + " def _get_tb_ckpt_callbacks(self):\n", + " \n", + " return [\n", + " self._create_tb_callbacks,\n", + " self._create_ckpt_callbacks\n", + " ]\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-07-30 22:49:58,193 - INFO - common - yaml file: config\\config.yaml loaded successfully]\n", + "[2023-07-30 22:49:58,201 - INFO - common - yaml file: params.yaml loaded successfully]\n", + "[2023-07-30 22:49:58,209 - INFO - common - Directory created ! artifacts]\n", + "[2023-07-30 22:49:58,209 - INFO - common - Directory created ! artifacts\\prepare_callbacks\\checkpoint_root_log_dir]\n", + "[2023-07-30 22:49:58,209 - INFO - common - Directory created ! artifacts\\prepare_callbacks\\tensorboard_root_log_dir]\n" + ] + } + ], + "source": [ + "try:\n", + " config = ConfigurationManager()\n", + " get_prepare_callback_config = config.get_prepare_callbacks_config()\n", + " Prepare_callbacks = PrepareCallback(get_prepare_callback_config)\n", + " call_backs_list = Prepare_callbacks._get_tb_ckpt_callbacks()\n", + " \n", + " \n", + "except Exception as e:\n", + " logger.exception(e)\n", + " raise e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ml-environment", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/04_model_training.ipynb b/research/04_model_training.ipynb new file mode 100644 index 0000000..868f173 --- /dev/null +++ b/research/04_model_training.ipynb @@ -0,0 +1,562 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import os " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "os.chdir(\"../\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass\n", + "from pathlib import Path\n", + "\n", + "@dataclass\n", + "class TrainingConfig:\n", + " \n", + " root_dir: Path\n", + " train_model_path: Path\n", + " updated_model_base_path: Path\n", + " training_data: Path\n", + " params_epochs: int\n", + " params_batch_size: int\n", + " params_is_augmentation: bool\n", + " param_image_size: list \n", + "\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from src.cnnClassifier.constant import *\n", + "from src.cnnClassifier.utils.common import read_yaml, create_directories\n", + "from src.cnnClassifier.entity.config_entity import PrepareCallbacksConfig" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "class ConfigurationManager:\n", + " \n", + " def __init__(self, configfile = CONFIG_FILE_PATH, paramsfile = PARAMS_FILE_PATH) -> None:\n", + " self.config = read_yaml(configfile)\n", + " self.params = read_yaml(paramsfile)\n", + " \n", + " create_directories([self.config.artifacts_root])\n", + " \n", + " \n", + " def get_prepare_callbacks_config(self) -> PrepareCallbacksConfig:\n", + " \n", + " config = self.config.prepare_callbacks\n", + " \n", + " model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)\n", + " \n", + " create_directories([\n", + " Path(model_ckpt_dir),\n", + " Path(config.tensorboard_root_log_dir)\n", + " ])\n", + " # code working\n", + " \n", + " prepare_callbacks_config = PrepareCallbacksConfig(\n", + " \n", + " root_dir=Path(config.root_dir),\n", + " tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),\n", + " checkpoint_model_filepath=Path(config.checkpoint_model_filepath)\n", + " \n", + " )\n", + " \n", + " return prepare_callbacks_config\n", + " \n", + " def get_training_config(self) -> TrainingConfig:\n", + " training = self.config.training\n", + " prepare_base_model = self.config.prepare_base_model\n", + " params = self.params\n", + " training_data = os.path.join(self.config.data_ingestion.root_dir, \"Chicken-fecal-images\")\n", + " \n", + " create_directories([\n", + " Path(training.root_dir) #data\n", + " ])\n", + " \n", + " training_config = TrainingConfig(\n", + " \n", + " root_dir=Path(training.root_dir), #data\n", + " train_model_path=Path(training.train_model_path), # artifacts/training/model.h5\n", + " updated_model_base_path=Path(prepare_base_model.updated_base_model_config), \n", + " training_data=Path(training_data),\n", + " params_epochs=params.EPOCH,\n", + " params_batch_size=params.BATCH_SIZE,\n", + " params_is_augmentation=params.AUGMENTATION,\n", + " param_image_size=params.IMAGE_SIZE,\n", + " \n", + " )\n", + " \n", + " return training_config" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import tensorflow as tf\n", + "\n", + "class PrepareCallback:\n", + " def __init__(self, config: PrepareCallbacksConfig) -> None:\n", + " self.config = config\n", + " \n", + " @property\n", + " def _create_tb_callbacks(self):\n", + " timeStamp = time.strftime(\"%Y-%m-%d-%H-%M-%S\")\n", + " \n", + " tb_running_log_dir = os.path.join(\n", + " self.config.tensorboard_root_log_dir,\n", + " f\"tb_log_at_{timeStamp}\",\n", + " \n", + " ) \n", + " \n", + " return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)\n", + " \n", + " @property\n", + " def _create_ckpt_callbacks(self):\n", + " \n", + " return tf.keras.callbacks.ModelCheckpoint(\n", + " self.config.checkpoint_model_filepath,\n", + " save_best_only=True\n", + " )\n", + " \n", + " \n", + " def _get_tb_ckpt_callbacks(self):\n", + " \n", + " return [\n", + " self._create_tb_callbacks,\n", + " self._create_ckpt_callbacks\n", + " ]\n", + " \n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import time\n", + "import tensorflow as tf" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "class Training:\n", + " def __init__(self, configfile: TrainingConfig) -> None:\n", + " self.config = configfile\n", + " \n", + " def get_base_model(self):\n", + " \n", + " self.model = tf.keras.models.load_model(\n", + " self.config.updated_model_base_path\n", + " )\n", + " \n", + " def train_valid_generator(self):\n", + "\n", + " datagenerator_kwargs = dict(\n", + " rescale = 1./255,\n", + " validation_split=0.20\n", + " )\n", + " \n", + " dataflow_kwargs = dict(\n", + " \n", + " target_size = self.config.param_image_size[:-1],\n", + " batch_size = self.config.params_batch_size,\n", + " interpolation = \"bilinear\"\n", + " \n", + " )\n", + " \n", + " valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n", + " **datagenerator_kwargs,\n", + " )\n", + " \n", + " self.valid_generator = valid_datagenerator.flow_from_directory(\n", + " directory=self.config.training_data,\n", + " subset=\"validation\",\n", + " shuffle=False,\n", + " **dataflow_kwargs\n", + " )\n", + " \n", + " if self.config.params_is_augmentation:\n", + " train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n", + " rotation_range=40,\n", + " horizontal_flip=True,\n", + " width_shift_range=0.2,\n", + " height_shift_range=0.2,\n", + " shear_range=0.2,\n", + " zoom_range=0.3,\n", + " **datagenerator_kwargs\n", + " )\n", + " \n", + " else:\n", + " train_datagenerator = valid_datagenerator\n", + " \n", + " \n", + " self.train_generator = train_datagenerator.flow_from_directory(\n", + " directory=self.config.training_data,\n", + " subset=\"training\",\n", + " shuffle=True,\n", + " **dataflow_kwargs\n", + " )\n", + " \n", + " \n", + " @staticmethod\n", + " def save_model(path: Path, model: tf.keras.Model):\n", + " model.save(path)\n", + " \n", + " \n", + " def train(self, callbacks_list: list):\n", + " self.steps_per_epochs = self.train_generator.samples // self.train_generator.batch_size\n", + " self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size\n", + " \n", + " self.model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),\n", + " loss=tf.keras.losses.CategoricalCrossentropy(),\n", + " metrics=[\"accuracy\"])\n", + "\n", + " \n", + " self.model.fit(\n", + " self.train_generator,\n", + " epochs=self.config.params_epochs,\n", + " steps_per_epoch=self.steps_per_epochs,\n", + " validation_steps=self.validation_steps,\n", + " validation_data=self.valid_generator,\n", + " callbacks=callbacks_list\n", + " )\n", + " \n", + " # self.model.summary()\n", + " \n", + " self.save_model(\n", + " path=self.config.train_model_path,\n", + " model=self.model\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-07-30 23:07:26,375 - INFO - common - yaml file: config\\config.yaml loaded successfully]\n", + "[2023-07-30 23:07:26,382 - INFO - common - yaml file: params.yaml loaded successfully]\n", + "[2023-07-30 23:07:26,382 - INFO - common - Directory created ! artifacts]\n", + "[2023-07-30 23:07:26,382 - INFO - common - Directory created ! artifacts\\prepare_callbacks\\checkpoint_root_log_dir]\n", + "[2023-07-30 23:07:26,389 - INFO - common - Directory created ! artifacts\\prepare_callbacks\\tensorboard_root_log_dir]\n", + "[2023-07-30 23:07:26,389 - INFO - common - Directory created ! artifacts\\training]\n", + "Found 78 images belonging to 2 classes.\n", + "Found 312 images belonging to 2 classes.\n", + "19/19 [==============================] - 164s 8s/step - loss: 1.1063 - accuracy: 0.5304 - val_loss: 0.3567 - val_accuracy: 0.7812\n", + "Model: \"model\"\n", + "_________________________________________________________________\n", + " Layer (type) Output Shape Param # \n", + "=================================================================\n", + " input_1 (InputLayer) [(None, 224, 224, 3)] 0 \n", + " \n", + " block1_conv1 (Conv2D) (None, 224, 224, 64) 1792 \n", + " \n", + " block1_conv2 (Conv2D) (None, 224, 224, 64) 36928 \n", + " \n", + " block1_pool (MaxPooling2D) (None, 112, 112, 64) 0 \n", + " \n", + " block2_conv1 (Conv2D) (None, 112, 112, 128) 73856 \n", + " \n", + " block2_conv2 (Conv2D) (None, 112, 112, 128) 147584 \n", + " \n", + " block2_pool (MaxPooling2D) (None, 56, 56, 128) 0 \n", + " \n", + " block3_conv1 (Conv2D) (None, 56, 56, 256) 295168 \n", + " \n", + " block3_conv2 (Conv2D) (None, 56, 56, 256) 590080 \n", + " \n", + " block3_conv3 (Conv2D) (None, 56, 56, 256) 590080 \n", + " \n", + " block3_pool (MaxPooling2D) (None, 28, 28, 256) 0 \n", + " \n", + " block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160 \n", + " \n", + " block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808 \n", + " \n", + " block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808 \n", + " \n", + " block4_pool (MaxPooling2D) (None, 14, 14, 512) 0 \n", + " \n", + " block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808 \n", + " \n", + " block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808 \n", + " \n", + " block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808 \n", + " \n", + " block5_pool (MaxPooling2D) (None, 7, 7, 512) 0 \n", + " \n", + " flatten (Flatten) (None, 25088) 0 \n", + " \n", + " dense (Dense) (None, 2) 50178 \n", + " \n", + "=================================================================\n", + "Total params: 14,764,866\n", + "Trainable params: 14,764,866\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "try:\n", + " \n", + " config = ConfigurationManager()\n", + " prepare_callbacks = config.get_prepare_callbacks_config()\n", + " prepare_callbacks = PrepareCallback(prepare_callbacks)\n", + " callback_list = prepare_callbacks._get_tb_ckpt_callbacks()\n", + " \n", + " training_config = config.get_training_config()\n", + " training = Training(configfile=training_config)\n", + " training.get_base_model()\n", + " training.train_valid_generator()\n", + " training.train(\n", + " callbacks_list=callback_list\n", + " )\n", + " \n", + " \n", + "except Exception as e:\n", + " raise\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-07-26 22:14:27,337 - INFO - common - yaml file: config\\config.yaml loaded successfully]\n", + "[2023-07-26 22:14:27,341 - INFO - common - yaml file: params.yaml loaded successfully]\n", + "[2023-07-26 22:14:27,343 - INFO - common - Directory created ! artifacts]\n", + "[2023-07-26 22:14:27,349 - INFO - common - Directory created ! artifacts\\prepare_callbacks\\checkpoint_root_log_dir]\n", + "[2023-07-26 22:14:27,349 - INFO - common - Directory created ! artifacts\\prepare_callbacks\\tensorboard_root_log_dir]\n" + ] + } + ], + "source": [ + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-07-26 22:14:30,646 - INFO - common - Directory created ! artifacts\\training]\n" + ] + }, + { + "data": { + "text/plain": [ + "TrainingConfig(root_dir=WindowsPath('artifacts/training'), train_model_path=WindowsPath('artifacts/training/model.h5'), updated_model_base_path=WindowsPath('artifacts/prepare_base_model/base_model_updated.h5'), training_data=WindowsPath('data/Chicken-fecal-images'), params_epochs=1, params_batch_size=16, params_is_augmentation=True, param_image_size=BoxList([224, 224, 3]))" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ml-environment", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/05_model_evalution.ipynb b/research/05_model_evalution.ipynb new file mode 100644 index 0000000..7bd39bd --- /dev/null +++ b/research/05_model_evalution.ipynb @@ -0,0 +1,214 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "os.chdir(\"../\")\n", + "# 'd:\\\\End-to-End chicken-diesase-implementation'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "model = tf.keras.models.load_model(\"artifacts/training/model.h5\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass\n", + "from pathlib import Path\n", + "@dataclass(frozen=True)\n", + "class ModelEvaluationConfig:\n", + " path_of_model: Path\n", + " training_data: Path\n", + " all_params: dict\n", + " params_image_size: list\n", + " params_batch_size: int\n", + " \n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from src.cnnClassifier.constant import *\n", + "from src.cnnClassifier.utils.common import read_yaml, create_directories, save_json" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "class ConfigurationManager:\n", + " \n", + " def __init__(self, configfile = CONFIG_FILE_PATH, paramsfile = PARAMS_FILE_PATH) -> None:\n", + " self.config = read_yaml(configfile)\n", + " self.params = read_yaml(paramsfile)\n", + " \n", + " create_directories([self.config.artifacts_root])\n", + " \n", + " \n", + " def get_validation_config(self) -> ModelEvaluationConfig:\n", + " \n", + " model_eval = ModelEvaluationConfig(\n", + " path_of_model = \"artifacts/training/model.h5\",\n", + " training_data = \"data/Chicken-fecal-images\",\n", + " all_params = self.params,\n", + " params_image_size = self.params.IMAGE_SIZE,\n", + " params_batch_size = self.params.BATCH_SIZE\n", + " )\n", + " \n", + " return model_eval\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "class Evaluation:\n", + " def __init__(self, config: ModelEvaluationConfig) -> None:\n", + " self.config = config\n", + " \n", + " \n", + " def _train_valid_generator(self):\n", + "\n", + " datagenerator_kwargs = dict(\n", + " rescale = 1./255,\n", + " validation_split=0.20\n", + " )\n", + " \n", + " dataflow_kwargs = dict(\n", + " \n", + " target_size = self.config.params_image_size[:-1],\n", + " batch_size = self.config.params_batch_size,\n", + " interpolation = \"bilinear\"\n", + " \n", + " )\n", + " \n", + " valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n", + " **datagenerator_kwargs,\n", + " )\n", + " \n", + " self.valid_generator = valid_datagenerator.flow_from_directory(\n", + " directory=self.config.training_data,\n", + " subset=\"validation\",\n", + " shuffle=False,\n", + " **dataflow_kwargs\n", + " )\n", + " \n", + " @staticmethod\n", + " def load_model(path: Path) -> tf.keras.Model:\n", + " return tf.keras.models.load_model(path)\n", + " \n", + " \n", + " def evaluation(self):\n", + " self.model = self.load_model(self.config.path_of_model)\n", + " self._train_valid_generator()\n", + " self.score = model.evaluate(self.valid_generator)\n", + " \n", + " def save_score(self):\n", + " score = {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n", + " save_json(path=Path(\"score.json\"), data=score)\n", + " \n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-08-02 00:10:45,952 - INFO - common - yaml file: config\\config.yaml loaded successfully]\n", + "[2023-08-02 00:10:45,958 - INFO - common - yaml file: params.yaml loaded successfully]\n", + "[2023-08-02 00:10:45,963 - INFO - common - Directory created ! artifacts]\n", + "Found 78 images belonging to 2 classes.\n", + "5/5 [==============================] - 7s 1s/step - loss: 0.3348 - accuracy: 0.8077\n", + "[2023-08-02 00:10:54,160 - INFO - common - json file saved at: score.json]\n" + ] + } + ], + "source": [ + "try:\n", + " config = ConfigurationManager()\n", + " get_eval = config.get_validation_config()\n", + " evaluations = Evaluation(get_eval)\n", + " evaluations.evaluation()\n", + " evaluations.save_score()\n", + " \n", + "except Exception as e:\n", + " raise e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ml-environment", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/research/trails.ipynbtest.py b/research/trails.ipynbtest.py new file mode 100644 index 0000000..e69de29 diff --git a/research/trials.ipynb b/research/trials.ipynb new file mode 100644 index 0000000..488d8eb --- /dev/null +++ b/research/trials.ipynb @@ -0,0 +1,25 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ml-environment", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/score.json b/score.json new file mode 100644 index 0000000..2a0e59a --- /dev/null +++ b/score.json @@ -0,0 +1,4 @@ +{ + "loss": 1.3572043180465698, + "accuracy": 0.5 +} \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..fcb5367 --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +import setuptools + +with open("README.md", "r", encoding="utf-8") as f: + print(f.read()) + + long_description = f.read() + + +__version__ = "0.0.0" + +AUTHOR_NAME = "Aman123lug" +SOCIAL_MEDIA = "lug__aman" +AUTHER_EMAIL = "ak06465676@gmail.com" +SRC_REPO = "cnnClassifier" +REPO_NAME = "End-to-End-Chicken-Disease-implementation" + + +setuptools.setup( + name=SRC_REPO, + version=__version__, + author=AUTHOR_NAME, + author_email=AUTHER_EMAIL, + description="Solved Classification promlem using CNN", + long_description=long_description, + download_url = f"https://github.com/{AUTHOR_NAME}/{REPO_NAME}", + package_dir={"": "src"}, + packages=setuptools.find_packages(where="src") + +) + diff --git a/src/cnnClassifier/__init__.py b/src/cnnClassifier/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/cnnClassifier/components/__init__.py b/src/cnnClassifier/components/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/cnnClassifier/components/data_ingestion.py b/src/cnnClassifier/components/data_ingestion.py new file mode 100644 index 0000000..6ffd120 --- /dev/null +++ b/src/cnnClassifier/components/data_ingestion.py @@ -0,0 +1,43 @@ +import os +import urllib.request as request +import zipfile +from ..loggerr import logger +from ..utils.common import get_size +from pathlib import Path +from ..entity.config_entity import DataIngestionConfig +import sys +from os.path import dirname, abspath + +class DataIngestion: + def __init__(self, config: DataIngestionConfig) -> None: + self.config = config + + def download_file(self): + try: + logger.info("Fetching zip file...") + if not os.path.exists(self.config.local_data_file): + filename, dirname = request.urlretrieve( + url=self.config.source_URL, + filename=self.config.local_data_file + ) + logger.info(f"File Download Successfully {filename} with : {dirname}") + + else: + logger.info(f"File already exists of size: {get_size(Path(self.config.local_data_file))}") + + except Exception as e: + logger.exception(e) + raise e + + + def extract_zip_file(self): + + unzip_dir = self.config.local_data_file + + with zipfile.ZipFile(unzip_dir, "r") as unzip: + logger.info("zipefile read successfully") + unzip.extractall("data") + logger.info("zipfile extracted successfully") + + + diff --git a/src/cnnClassifier/components/model_evaluation.py b/src/cnnClassifier/components/model_evaluation.py new file mode 100644 index 0000000..b521cd6 --- /dev/null +++ b/src/cnnClassifier/components/model_evaluation.py @@ -0,0 +1,55 @@ +import tensorflow as tf +from ..entity.config_entity import ModelEvaluationConfig +from pathlib import Path +from ..utils.common import save_json + + +model = tf.keras.models.load_model("artifacts/training/model.h5") + +class Evaluation: + def __init__(self, config: ModelEvaluationConfig) -> None: + self.config = config + + + def _train_valid_generator(self): + + datagenerator_kwargs = dict( + rescale = 1./255, + validation_split=0.20 + ) + + dataflow_kwargs = dict( + + target_size = self.config.params_image_size[:-1], + batch_size = self.config.params_batch_size, + interpolation = "bilinear" + + ) + + valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator( + **datagenerator_kwargs, + ) + + self.valid_generator = valid_datagenerator.flow_from_directory( + directory=self.config.training_data, + subset="validation", + shuffle=False, + **dataflow_kwargs + ) + + @staticmethod + def load_model(path: Path) -> tf.keras.Model: + return tf.keras.models.load_model(path) + + + def evaluation(self): + self.model = self.load_model(self.config.path_of_model) + self._train_valid_generator() + self.score = model.evaluate(self.valid_generator) + + def save_score(self): + score = {"loss": self.score[0], "accuracy": self.score[1]} + save_json(path=Path("score.json"), data=score) + + + \ No newline at end of file diff --git a/src/cnnClassifier/components/prepare_base_model.py b/src/cnnClassifier/components/prepare_base_model.py new file mode 100644 index 0000000..d6b2b60 --- /dev/null +++ b/src/cnnClassifier/components/prepare_base_model.py @@ -0,0 +1,68 @@ + +from ..entity.config_entity import PrepareBaseModelConfig +import tensorflow as tf +from pathlib import Path + +class PrepareBaseModel: + def __init__(self, config: PrepareBaseModelConfig) -> None: + self.config = config + + def get_base_model(self): + self.model = tf.keras.applications.vgg16.VGG16( + include_top=self.config.params_include_top, + weights=self.config.params_weight, + input_shape=self.config.params_image_size, + classes=self.config.params_classes, + + ) + + self.save_model(path=self.config.base_model_path, model=self.model) + + @staticmethod + def prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate): + if freeze_all: + for layer in model.layers: + model.trainlable = False + + elif (freeze_till is not None) and (freeze_till > 0): + for layers in model.layers[:-freeze_till]: + model.trainable = False + + flatten_in = tf.keras.layers.Flatten()(model.output) + prediction = tf.keras.layers.Dense( + units=classes, + activation="softmax" + )(flatten_in) + + + full_model = tf.keras.models.Model( + inputs=model.input, + outputs=prediction + ) + + full_model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), + loss=tf.keras.losses.CategoricalCrossentropy(), + metrics=["accuracy"] + + ) + full_model.summary() + + return full_model + + + def update_base_model(self): + self.full_model = self.prepare_full_model( + model=self.model, + classes=self.config.params_classes, + freeze_all=True, + freeze_till=None, + learning_rate=self.config.params_learning_rate + ) + + self.save_model(path=self.config.updated_base_model, model=self.full_model) + + @staticmethod + def save_model(path: Path, model: tf.keras.Model): + model.save(path) + + \ No newline at end of file diff --git a/src/cnnClassifier/components/prepare_callbacks.py b/src/cnnClassifier/components/prepare_callbacks.py new file mode 100644 index 0000000..3b1db14 --- /dev/null +++ b/src/cnnClassifier/components/prepare_callbacks.py @@ -0,0 +1,41 @@ +import time +import tensorflow as tf +from ..loggerr import logger +from ..entity.config_entity import PrepareCallbacksConfig +import os + + + +class PrepareCallback: + def __init__(self, config: PrepareCallbacksConfig) -> None: + self.config = config + + @property + def _create_tb_callbacks(self): + timeStamp = time.strftime("%Y-%m-%d-%H-%M-%S") + + tb_running_log_dir = os.path.join( + self.config.tensorboard_root_log_dir, + f"tb_log_at_{timeStamp}", + + ) + + return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir) + + @property + def _create_ckpt_callbacks(self): + + return tf.keras.callbacks.ModelCheckpoint( + self.config.checkpoint_model_filepath, + save_best_only=True + ) + + + def _get_tb_ckpt_callbacks(self): + + return [ + self._create_tb_callbacks, + self._create_ckpt_callbacks + ] + + \ No newline at end of file diff --git a/src/cnnClassifier/components/training.py b/src/cnnClassifier/components/training.py new file mode 100644 index 0000000..d553446 --- /dev/null +++ b/src/cnnClassifier/components/training.py @@ -0,0 +1,92 @@ +import tensorflow as tf +from ..entity.config_entity import TrainingConfig +from pathlib import Path + +class Training: + def __init__(self, configfile: TrainingConfig) -> None: + self.config = configfile + + def get_base_model(self): + + self.model = tf.keras.models.load_model( + self.config.updated_model_base_path + ) + + def train_valid_generator(self): + + datagenerator_kwargs = dict( + rescale = 1./255, + validation_split=0.20 + ) + + dataflow_kwargs = dict( + + target_size = self.config.param_image_size[:-1], + batch_size = self.config.params_batch_size, + interpolation = "bilinear" + + ) + + valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator( + **datagenerator_kwargs, + ) + + self.valid_generator = valid_datagenerator.flow_from_directory( + directory=self.config.training_data, + subset="validation", + shuffle=False, + **dataflow_kwargs + ) + + if self.config.params_is_augmentation: + train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator( + rotation_range=40, + horizontal_flip=True, + width_shift_range=0.2, + height_shift_range=0.2, + shear_range=0.2, + zoom_range=0.3, + **datagenerator_kwargs + ) + + else: + train_datagenerator = valid_datagenerator + + + self.train_generator = train_datagenerator.flow_from_directory( + directory=self.config.training_data, + subset="training", + shuffle=True, + **dataflow_kwargs + ) + + + @staticmethod + def save_model(path: Path, model: tf.keras.Model): + model.save(path) + + + def train(self, callbacks_list: list): + self.steps_per_epochs = self.train_generator.samples // self.train_generator.batch_size + self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size + + self.model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001), + loss=tf.keras.losses.CategoricalCrossentropy(), + metrics=["accuracy"]) + + + self.model.fit( + self.train_generator, + epochs=self.config.params_epochs, + steps_per_epoch=self.steps_per_epochs, + validation_steps=self.validation_steps, + validation_data=self.valid_generator, + callbacks=callbacks_list + ) + + # self.model.summary() + + self.save_model( + path=self.config.train_model_path, + model=self.model + ) \ No newline at end of file diff --git a/src/cnnClassifier/config/__init__.py b/src/cnnClassifier/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/cnnClassifier/config/configration.py b/src/cnnClassifier/config/configration.py new file mode 100644 index 0000000..1abd727 --- /dev/null +++ b/src/cnnClassifier/config/configration.py @@ -0,0 +1,111 @@ +from ..constant import * +from ..utils.common import load_bin, read_yaml, create_directories +from ..entity.config_entity import DataIngestionConfig, PrepareBaseModelConfig, PrepareCallbacksConfig, TrainingConfig, ModelEvaluationConfig +import os + +class ConfigurationManager: + def __init__(self, configfile_path = CONFIG_FILE_PATH, paramsfile_path = PARAMS_FILE_PATH) -> None: + + self.config = read_yaml(configfile_path) + self.params = read_yaml(paramsfile_path) + + create_directories([self.config.artifacts_root]) + + def get_data_ingestion_config(self) -> DataIngestionConfig: + config = self.config.data_ingestion + + create_directories([config.root_dir]) + + + data_ingestion_config = DataIngestionConfig( + + root_dir=config.root_dir, + source_URL=config.source_URL, + local_data_file=config.local_data_file, + unzip_dir=config.unzip_dir + + ) + return data_ingestion_config + + + def get_base_model_config(self) -> PrepareBaseModelConfig: + + config = self.config.prepare_base_model + + create_directories([config.root_dir]) + + prepare_base_model_config = PrepareBaseModelConfig( + root_dir=config.root_dir, + base_model_path=config.base_model_config, + updated_base_model=config.updated_base_model_config, + params_image_size=self.params.IMAGE_SIZE, + params_learning_rate=self.params.LEARNING_RATE, + params_include_top=self.params.INCLUDE_TOP, + params_weight=self.params.WEIGHTS, + params_classes=self.params.CLASSES + + ) + + return prepare_base_model_config + + + def get_prepare_callbacks_config(self) -> PrepareCallbacksConfig: + + config = self.config.prepare_callbacks + + model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath) + + create_directories([ + Path(model_ckpt_dir), + Path(config.tensorboard_root_log_dir) + ]) + + prepare_callbacks_config = PrepareCallbacksConfig( + + root_dir=Path(config.root_dir), + tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir), + checkpoint_model_filepath=Path(config.checkpoint_model_filepath) + + ) + + return prepare_callbacks_config + + def get_training_config(self) -> TrainingConfig: + training = self.config.training + + prepare_base_model = self.config.prepare_base_model + params = self.params + training_data = os.path.join(self.config.data_ingestion.root_dir, "Chicken-fecal-images") + + create_directories([ + Path(training.root_dir) #data + ]) + + training_config = TrainingConfig( + + root_dir=Path(training.root_dir), #data + train_model_path=Path(training.train_model_path), # artifacts/training/model.h5 + updated_model_base_path=Path(prepare_base_model.updated_base_model_config), + training_data=Path(training_data), + params_epochs=params.EPOCH, + params_batch_size=params.BATCH_SIZE, + params_is_augmentation=params.AUGMENTATION, + param_image_size=params.IMAGE_SIZE, + + ) + + return training_config + + + def get_validation_config(self) -> ModelEvaluationConfig: + + model_eval = ModelEvaluationConfig( + path_of_model = "artifacts/training/model.h5", + training_data = "data/Chicken-fecal-images", + all_params = self.params, + params_image_size = self.params.IMAGE_SIZE, + params_batch_size = self.params.BATCH_SIZE + ) + + return model_eval + \ No newline at end of file diff --git a/src/cnnClassifier/constant/__init__.py b/src/cnnClassifier/constant/__init__.py new file mode 100644 index 0000000..ea56876 --- /dev/null +++ b/src/cnnClassifier/constant/__init__.py @@ -0,0 +1,5 @@ +from pathlib import Path + +CONFIG_FILE_PATH = Path("config/config.yaml") +PARAMS_FILE_PATH = Path("params.yaml") + diff --git a/src/cnnClassifier/entity/__init__.py b/src/cnnClassifier/entity/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/cnnClassifier/entity/config_entity.py b/src/cnnClassifier/entity/config_entity.py new file mode 100644 index 0000000..5cd2ba8 --- /dev/null +++ b/src/cnnClassifier/entity/config_entity.py @@ -0,0 +1,57 @@ +from dataclasses import dataclass +from pathlib import Path + +@dataclass(frozen=True) +class DataIngestionConfig: + root_dir: Path + source_URL: str + local_data_file: Path + unzip_dir: Path + + + +@dataclass +class PrepareBaseModelConfig: + root_dir: Path + base_model_path: Path + updated_base_model: Path + params_image_size: list + params_learning_rate: float + params_include_top: bool + params_weight: str + params_classes: int + + + + +@dataclass +class PrepareCallbacksConfig: + root_dir: Path + tensorboard_root_log_dir: Path + checkpoint_model_filepath: Path + + + + +@dataclass +class TrainingConfig: + + root_dir: Path + train_model_path: Path + updated_model_base_path: Path + training_data: Path + params_epochs: int + params_batch_size: int + params_is_augmentation: bool + param_image_size: list + + + +@dataclass(frozen=True) +class ModelEvaluationConfig: + path_of_model: Path + training_data: Path + all_params: dict + params_image_size: list + params_batch_size: int + \ No newline at end of file diff --git a/src/cnnClassifier/loggerr.py b/src/cnnClassifier/loggerr.py new file mode 100644 index 0000000..11871f8 --- /dev/null +++ b/src/cnnClassifier/loggerr.py @@ -0,0 +1,26 @@ +import os +import sys +import logging +from datetime import datetime + +logging_str = "[%(asctime)s - %(levelname)s - %(module)s - %(message)s]" + +LOG_FILE = f"{datetime.now().strftime('%m-%d-%Y-%H-%M-%S')}.log" +log_file_path = os.path.join(os.getcwd(),"LOGS",LOG_FILE) +os.makedirs(log_file_path, exist_ok=True) + +LOG_FILE_PATH = os.path.join(log_file_path, LOG_FILE) + +logging.basicConfig( + level=logging.INFO, + format=logging_str, + + handlers=[ + logging.FileHandler(LOG_FILE_PATH), + logging.StreamHandler(sys.stdout) + + ] +) + +logger = logging.getLogger("cnnClassifierLogger") + diff --git a/src/cnnClassifier/pipline/__init__.py b/src/cnnClassifier/pipline/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/cnnClassifier/pipline/predict.py b/src/cnnClassifier/pipline/predict.py new file mode 100644 index 0000000..bfe98b9 --- /dev/null +++ b/src/cnnClassifier/pipline/predict.py @@ -0,0 +1,32 @@ +import numpy as np +import keras +import tensorflow as tf +from keras.models import load_model +# from tf.keras.preprocessing import image +import os + +class PredictPipeline: + def __init__(self, filename) -> None: + self.filname = filename + + + def predict(self): + + model = load_model(os.path.join("artifacts","training", "model.h5")) + image_name = self.filname + test_image = tf.keras.preprocessing.image.load_img(image_name) + test_image = tf.keras.preprocessing.image.img_to_array(test_image) + test_image = np.expand_dims(test_image, axis=0) + result = np.argmax(model.predict(test_image), axis=1) + print(result) + + if result[0] == 1: + prediction = "Healthy" + return [{"image":prediction}] + + else: + prediction = "Coccidiosis" + return [{"image":prediction}] + + + diff --git a/src/cnnClassifier/pipline/stage_01_data_ingestion.py b/src/cnnClassifier/pipline/stage_01_data_ingestion.py new file mode 100644 index 0000000..2991d4e --- /dev/null +++ b/src/cnnClassifier/pipline/stage_01_data_ingestion.py @@ -0,0 +1,29 @@ +from ..config.configration import ConfigurationManager +from ..components.data_ingestion import DataIngestion +from ..loggerr import logger + +STAGE_NAME = "Data Ingestion Stage" + +class DataIngestionTrainingPipeline: + def __init__(self) -> None: + pass + def main(self): + + config = ConfigurationManager() + get_config_data = config.get_data_ingestion_config() + data_ingestion = DataIngestion(get_config_data) + data_ingestion.download_file() + data_ingestion.extract_zip_file() + +if __name__ == "__main__": + + try: + logger.info(f" >>>> stage {STAGE_NAME} <<<< started !") + obj = DataIngestionTrainingPipeline() + obj.main() + logger.info(f" >>>> stage {STAGE_NAME} <<<< Completed ! \n\n x==================x") + + except Exception as e: + logger.exception(e) + raise e + \ No newline at end of file diff --git a/src/cnnClassifier/pipline/stage_02_prepare_base_model.py b/src/cnnClassifier/pipline/stage_02_prepare_base_model.py new file mode 100644 index 0000000..cb2fd88 --- /dev/null +++ b/src/cnnClassifier/pipline/stage_02_prepare_base_model.py @@ -0,0 +1,31 @@ +from ..config.configration import ConfigurationManager +from ..components.prepare_base_model import PrepareBaseModel +from ..loggerr import logger + +STAGE_NAME = "Prepare Base Model" + +class PrepareBaseModelTrainingPipeline: + def __init__(self) -> None: + pass + + def main(self): + config = ConfigurationManager() + get_base_model_config = config.get_base_model_config() + prepare_model = PrepareBaseModel(get_base_model_config) + prepare_model.get_base_model() + prepare_model.update_base_model() + + +if __name__ == "__main__": + try: + logger.info(f" >>>> Started {STAGE_NAME} Stage<<<<") + obj2 = PrepareBaseModelTrainingPipeline() + obj2.main() + logger.info(f" >>>> stage {STAGE_NAME} <<<< Completed ! \n\n x==================x") + + except Exception as e: + logger.exception(e) + raise e + + + \ No newline at end of file diff --git a/src/cnnClassifier/pipline/stage_03_training.py b/src/cnnClassifier/pipline/stage_03_training.py new file mode 100644 index 0000000..2b94bd2 --- /dev/null +++ b/src/cnnClassifier/pipline/stage_03_training.py @@ -0,0 +1,42 @@ +from ..loggerr import logger +from ..config.configration import ConfigurationManager +from ..components.prepare_callbacks import PrepareCallback +from ..components.training import Training + +STAGE_NAME = "Training" + +class ModelTrainingPipeline: + def __init__(self) -> None: + pass + + + def main(self): + + config = ConfigurationManager() + prepare_callbacks = config.get_prepare_callbacks_config() + prepare_callbacks = PrepareCallback(prepare_callbacks) + callback_list = prepare_callbacks._get_tb_ckpt_callbacks() + + training_config = config.get_training_config() + training = Training(configfile=training_config) + training.get_base_model() + training.train_valid_generator() + training.train( + callbacks_list=callback_list + ) + + +if __name__ == "__main__": + + try: + logger.info(f"<<<< Stage {STAGE_NAME} Started >>>>") + model = ModelTrainingPipeline() + model.main() + + logger.info(f"<<<< Stage {STAGE_NAME} Completed >>>>") + + except Exception as e: + logger.exception(e) + raise e + + \ No newline at end of file diff --git a/src/cnnClassifier/pipline/stage_04_evaluation.py b/src/cnnClassifier/pipline/stage_04_evaluation.py new file mode 100644 index 0000000..6b92492 --- /dev/null +++ b/src/cnnClassifier/pipline/stage_04_evaluation.py @@ -0,0 +1,34 @@ +from ..entity.config_entity import ModelEvaluationConfig +from ..config.configration import ConfigurationManager +from ..components.model_evaluation import Evaluation +from ..loggerr import logger + +STAGE_NAME = "Model Evaluation" + +class ModelEvaluation: + def __init__(self) -> None: + pass + + def main(self): + + config = ConfigurationManager() + get_eval = config.get_validation_config() + evaluations = Evaluation(get_eval) + evaluations.evaluation() + evaluations.save_score() + +if __name__ == "__main__": + + try: + logger.info(f"<<<< stage {STAGE_NAME} Started >>>>") + evaluate = ModelEvaluation() + evaluate.main() + + logger.info(f"<<<< Stage {STAGE_NAME} Completed >>>>") + + except Exception as e: + logger.exception(e) + raise e + + + \ No newline at end of file diff --git a/src/cnnClassifier/utils/__init__.py b/src/cnnClassifier/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/cnnClassifier/utils/common.py b/src/cnnClassifier/utils/common.py new file mode 100644 index 0000000..1a31af7 --- /dev/null +++ b/src/cnnClassifier/utils/common.py @@ -0,0 +1,120 @@ +from ensure import ensure_annotations +import yaml +import os +from pathlib import Path +from box.exceptions import BoxValueError +import yaml +import json +import joblib +from box import ConfigBox +from typing import Any +import base64 +from src.cnnClassifier.loggerr import logger + + + +@ensure_annotations +def read_yaml(path_to_yaml: Path) -> ConfigBox: + """reads yaml file and returns + + Args: + path_to_yaml (str): path like input + + Raises: + ValueError: if yaml file is empty + e: empty file + + Returns: + ConfigBox: ConfigBox type + """ + try: + with open(path_to_yaml) as yaml_file: + content = yaml.safe_load(yaml_file) + logger.info(f"yaml file: {path_to_yaml} loaded successfully") + return ConfigBox(content) + except BoxValueError: + raise ValueError("yaml file is empty") + except Exception as e: + raise e + +@ensure_annotations +def load_json(json_file: str) -> ConfigBox: + """ load json file data + + Args: + path of json file + + Return: + ConfigBox + """ + with open(json_file) as f: + content = json.load(f) + + logger.info("json file loaded !") + return ConfigBox(content) + + +@ensure_annotations +def create_directories(file_lists: list, verbose=True): + for path in file_lists: + + os.makedirs(path, exist_ok=True) + if verbose: + logger.info(f"Directory created ! {path}") + + +@ensure_annotations +def save_json(path: Path, data: dict): + + with open(path, "w") as f: + json.dump(data, f, indent=4) + + logger.info(f"json file saved at: {path}") + + + + +@ensure_annotations +def load_json(path: Path) -> ConfigBox: + + with open(path) as f: + content = json.load(f) + + logger.info(f"json file loaded succesfully from: {path}") + return ConfigBox(content) + + +@ensure_annotations +def save_bin(data: Any, path: Path): + + joblib.dump(value=data, filename=path) + logger.info(f"binary file saved at: {path}") + + +@ensure_annotations +def load_bin(path: Path) -> Any: + + data = joblib.load(path) + logger.info(f"binary file loaded from: {path}") + return data + +@ensure_annotations +def get_size(path: Path) -> str: + + size_in_kb = round(os.path.getsize(path)/1024) + return f"~ {size_in_kb} KB" + + +def decodeImage(imgstring, fileName): + imgdata = base64.b64decode(imgstring) + with open(fileName, 'wb') as f: + f.write(imgdata) + f.close() + + +def encodeImageIntoBase64(croppedImagePath): + with open(croppedImagePath, "rb") as f: + return base64.b64encode(f.read()) + + + \ No newline at end of file diff --git a/template.py b/template.py new file mode 100644 index 0000000..f64cafe --- /dev/null +++ b/template.py @@ -0,0 +1,56 @@ +import os +from pathlib import Path +import logging + +logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') + +project_name = "CNN-Classifier" + +list_of_files = [ + ".github/workflows/.gitkeep", + f"src/{project_name}/__init__.py", + f"src/{project_name}/components/__init__.py", + f"src/{project_name}/utils/__init__.py", + f"src/{project_name}/config/configration.py", + f"src/{project_name}/pipline/__init__.py", + f"src/{project_name}/entity/__init__.py", + f"src/{project_name}/constant/__init__.py", + "config/config.yaml", + "dvc.yaml", + "params.yaml", + "requirements.txt", + "setup.py", + "research/trails.ipynb" + "test.py", + "templates/index.html" + +] + +for file in list_of_files: + filepath = Path(file) #src\CNN-Classifier\utils + dirname, filename = os.path.split(filepath) + + + if dirname != "": + os.makedirs(dirname, exist_ok=True) + logging.info(f"Directory Created ! {dirname} -- for file = {filename}") + + if (not os.path.exists(filename)) or (os.path.getsize(filepath))==0: + with open(filepath, "w") as f: + pass + logging.info(f"Creating empty file: {filepath}") + + else: + logging.info(f"{filename} is already created !!") + + + + + + + + + + + + \ No newline at end of file diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..1094675 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,12 @@ + + + + + + Document + + + + + + \ No newline at end of file