diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b392395 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,11 @@ +# Model Checkpoints +captioning_processor +captioning_processor + +# environments +env + +# vscode +.vscode + +__pycache__/ \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..ef7f2cd --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,31 @@ +name: Build Docker Container + +permissions: + contents: write + packages: write + +on: + push: + branches: + - main + +jobs: + build-and-push-image: + runs-on: self-hosted + permissions: + packages: write + steps: + - name: Set up Git repository + uses: actions/checkout@v2 + with: + token: ${{ secrets.PAT }} + + - name: Log in to the Docker hub + run: docker login -u ${{secrets.DOCKER_USER}} -p ${{secrets.DOCKER_PASS}} + + - name: Build and push Docker image + uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc + with: + push: false + tags: image-captioning-fed-server:latest + github-token : ${{ secrets.PAT }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b392395 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# Model Checkpoints +captioning_processor +captioning_processor + +# environments +env + +# vscode +.vscode + +__pycache__/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ef866e6 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,25 @@ +# Stage 1: Build environment +FROM python:3.10.12-slim + +WORKDIR /app + +COPY image-captioning/requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# # Stage 2: Runtime environment +# FROM python:3.9-slim + +# WORKDIR /app + +# # Copy the installed dependencies from the builder stage +# COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages + +# # Copy the Python script +# COPY your_script.py . + +EXPOSE 8080 + +# Run the Python script +CMD ["python", "server.py"] diff --git a/image-captioning/Model.py b/image-captioning/Model.py new file mode 100644 index 0000000..315ad2c --- /dev/null +++ b/image-captioning/Model.py @@ -0,0 +1,92 @@ +from transformers import BlipForConditionalGeneration, AutoProcessor +import torch +import os +import gc + +DEVICE="cuda" if torch.cuda.is_available() else "cpu" + +def train(epocs, model, loader, optimizer): + model.train() + losses = [] + for epoch in range(1, epocs+1): + print("---------------------------------------") + print("Epoch:", epoch) + epocs_losses = [] + for idx, batch in enumerate(loader): + # if idx == 3: + # break + input_ids = batch.pop("input_ids").to(DEVICE) + pixel_values = batch.pop("pixel_values").to(DEVICE) + + outputs = model(input_ids=input_ids, + pixel_values=pixel_values, + labels=input_ids) + + loss = outputs.loss + print(f"Loss in batch {idx}: {loss.item()}") + epocs_losses.append(loss.item()) + loss.backward() + + optimizer.step() + optimizer.zero_grad() + losses.extend(epocs_losses) + gc.collect() + return losses + + +@torch.no_grad() +def test(model, loader): + model.eval() + + dataset_size = 0 + running_loss = 0.0 + correct_predictions = 0 + mis_predictions = 0 + + for idx, batch in enumerate(loader): + # if idx == 3: + # break + print("---------------------------------------") + print("Step:", idx) + input_ids = batch['input_ids'].to(DEVICE) + pixel_values = batch['pixel_values'].to(DEVICE) + + batch_size = input_ids.size(0) + + outputs = model(input_ids=input_ids, + pixel_values=pixel_values, + labels=input_ids) + logits = outputs.decoder_logits + loss = outputs.loss + print(f"Loss in batch {idx}: {loss.item()}") + + running_loss += (loss.item() * batch_size) + dataset_size += batch_size + predictions = torch.argmax(logits, dim=-1) + correct_predictions += torch.sum(predictions == input_ids).item() + mis_predictions += torch.sum(predictions != input_ids).item() + epoch_accuracy = correct_predictions / (correct_predictions+mis_predictions) + epoch_loss = running_loss / dataset_size + print("Epoch Loss:", epoch_loss, "Epoch Accuracy:", epoch_accuracy) + + gc.collect() + + return epoch_loss, epoch_accuracy + +if os.path.exists("captioning_model"): + checkpoint_model = "captioning_model" +else: + checkpoint_model = "Salesforce/blip-image-captioning-base" + +if os.path.exists("captioning_processor"): + checkpoint_processor = "captioning_processor" +else: + checkpoint_processor = "Salesforce/blip-image-captioning-base" + +processor = AutoProcessor.from_pretrained(checkpoint_processor) +model = BlipForConditionalGeneration.from_pretrained(checkpoint_model).to(DEVICE) + +for vision_layer in list(list(model.children())[0].parameters()): + vision_layer.requires_grad = False + +optimizer = torch.optim.SGD(model.parameters(), lr=5e-5) \ No newline at end of file diff --git a/image-captioning/requirements.txt b/image-captioning/requirements.txt new file mode 100644 index 0000000..b42a377 --- /dev/null +++ b/image-captioning/requirements.txt @@ -0,0 +1,45 @@ +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +cryptography==41.0.7 +filelock==3.13.1 +flwr==1.7.0 +fsspec==2024.2.0 +grpcio==1.60.1 +huggingface-hub==0.20.3 +idna==3.6 +iterators==0.0.2 +Jinja2==3.1.3 +MarkupSafe==2.1.5 +mpmath==1.3.0 +networkx==3.2.1 +numpy==1.26.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.3.101 +nvidia-nvtx-cu12==12.1.105 +packaging==23.2 +pillow==10.2.0 +protobuf==4.25.2 +pycparser==2.21 +pycryptodome==3.20.0 +PyYAML==6.0.1 +regex==2023.12.25 +requests==2.31.0 +safetensors==0.4.2 +sympy==1.12 +tokenizers==0.15.2 +torch==2.2.0 +tqdm==4.66.2 +transformers==4.37.2 +triton==2.2.0 +typing_extensions==4.9.0 +urllib3==2.2.0 diff --git a/image-captioning/server.py b/image-captioning/server.py new file mode 100644 index 0000000..44ec691 --- /dev/null +++ b/image-captioning/server.py @@ -0,0 +1,21 @@ +import flwr as fl +from Model import model, processor + +weights = [val.cpu().numpy() for _, val in model.state_dict().items()] + +parameters = fl.common.ndarrays_to_parameters(weights) + +strategy = fl.server.strategy.FedAvg( + initial_parameters=parameters, +) + + +fl.server.start_server( + server_address="0.0.0.0:8080", + config=fl.server.ServerConfig(num_rounds=3), + grpc_max_message_length=2000000000, + strategy=strategy, +) + +model.save_pretrained('captioning_model') +processor.save_pretrained('captioning_processor') \ No newline at end of file