From b44001439c6704bfcb6bc4a4f5adc81256ac15eb Mon Sep 17 00:00:00 2001 From: Lisa <47147576+work4cs@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:59:38 -0400 Subject: [PATCH] improve bioclip baseline, run script, gitignore (#37) --- .gitignore | 1 + baselines/BioCLIP_code_submission/model.py | 15 ++++-------- .../BioCLIP_code_submission/requirements.txt | 2 +- run.sh | 23 +++++++++++++------ 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 5db9ceb..02828c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ sample_result_submission/ input_data/ +reference_data/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/baselines/BioCLIP_code_submission/model.py b/baselines/BioCLIP_code_submission/model.py index 4b08db4..53cc213 100644 --- a/baselines/BioCLIP_code_submission/model.py +++ b/baselines/BioCLIP_code_submission/model.py @@ -4,8 +4,7 @@ - predict: uses the model to perform predictions. - load: reloads the model. ''' -from open_clip import create_model -from torchvision import transforms +from open_clip import create_model_and_transforms import torch import pickle import os @@ -18,20 +17,14 @@ def __init__(self): def load(self): self.device='cuda' if torch.cuda.is_available() else 'cpu' - model = create_model("hf-hub:imageomics/bioclip", output_dict=True, require_pretrained=True) + model, _, preprocess_val = create_model_and_transforms("hf-hub:imageomics/bioclip", precision="amp", output_dict=True) + model.eval() self.model = model.to(self.device) + self.preprocess_img = preprocess_val with open(os.path.join(os.path.dirname(__file__), "clf.pkl"), "rb") as f: self.clf = pickle.load(f) - self.preprocess_img = transforms.Compose( - [ - transforms.ToTensor(), - transforms.Resize((224, 224), interpolation=transforms.InterpolationMode.BICUBIC), - ] - ) - - def predict(self, datapoint): with torch.no_grad(): diff --git a/baselines/BioCLIP_code_submission/requirements.txt b/baselines/BioCLIP_code_submission/requirements.txt index 2bc001a..f65593a 100644 --- a/baselines/BioCLIP_code_submission/requirements.txt +++ b/baselines/BioCLIP_code_submission/requirements.txt @@ -1,4 +1,4 @@ open-clip-torch==2.24.0 torch==2.3.0 torchvision==0.18.0 -scikit-learn==1.3.2 +scikit-learn==1.4.2 diff --git a/run.sh b/run.sh index 4833d43..e774437 100755 --- a/run.sh +++ b/run.sh @@ -2,11 +2,18 @@ : <<'END_COMMENT' 1. 1.a. If run in docker: -docker pull icreateadockerid/anomaly_challenge -docker run -it -v [repo path]:/codabench icreateadockerid/anomaly_challenge:cpu /bin/bash + +docker pull [image_id] + +1.a.1. If use a GPU: +docker run -it --gpus device=0 -v [repo path]:/codabench [image_id] /bin/bash +1.a.2. If only use CPU: +docker run -it -v [repo path]:/codabench [image_id] /bin/bash + cd codabench 1.b. If run with conda env: + 1.b.1. create running env with conda, venv, etc. conda create --name [name] python=3.10 conda activate [name] @@ -14,15 +21,17 @@ conda activate [name] pip install pillow==10.3.0 tqdm==4.66.4 pandas==2.2.2 scikit-learn==1.4.2 2. edit and run the script + chmod +x run.sh + ./run.sh END_COMMENT export task_type="folder; predict; evaluate" #folder; predict; evaluate -export data_split="test" +export data_split="dev" export baseline_model="bioclip" -export task_folder="$data_split_$baseline_model" +export task_folder="${data_split}_${baseline_model}" ## create folder structure @@ -57,12 +66,12 @@ if [[ "$task_type" == *"predict"* ]]; then exit 1 fi - python ingestion_program/ingestion.py $input_dir $output_dir $program_dir $submission_dir + python3 ingestion_program/ingestion.py $input_dir $output_dir $program_dir $submission_dir fi ## score the predictions if [[ "$task_type" == *"evaluate"* ]]; then export input_dir="sample_result_submission/$task_folder" export output_dir="sample_result_submission/$task_folder" - python scoring_program/score_combined.py $input_dir $output_dir -fi + python3 scoring_program/score_combined.py $input_dir $output_dir +fi \ No newline at end of file