Merge pull request #35 from surfriderfoundationeurope/feature/debug_dev

Feature/debug dev
surfriderfoundationeurope · Dec 20, 2022 · ddb35b1 · ddb35b1
2 parents bd04459 + b7b2561
commit ddb35b1
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 15 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "plastic-origins"
-version = "2.2.1a0"
+version = "2.2.2a0"
 
 description = "A package containing methods commonly used to make inferences"
 repository = "https://github.com/surfriderfoundationeurope/surfnet"

diff --git a/src/plasticorigins/training/data/data_processing.py b/src/plasticorigins/training/data/data_processing.py
@@ -37,7 +37,7 @@
     data_dir: Union[WindowsPath, str],
     images_dir: Union[WindowsPath, str],
     labels_folder_name: Union[str, WindowsPath],
-    new_csv_bounding_boxes: Union[WindowsPath, str],
+    new_csv_bounding_boxes: Optional[Union[WindowsPath, str]],
     df_bboxes: DataFrame,
     df_images: DataFrame,
     user: str,
@@ -64,6 +64,7 @@
 from matplotlib import image
 import matplotlib.pyplot as plt
 from PIL import Image, ExifTags, ImageDraw
+import cv2
 
 
 class_id_to_name_mapping = {
@@ -272,14 +273,13 @@ def apply_image_transformations(
     # in place rotation of the image using Exif data
     image = image_orientation(image)
 
-    img = np.array(image)
-    h, w = img.shape[:-1]
+    w, h = image.size
+    image = np.array(image)
     target_h = 1080  # the target height of the image
     ratio = target_h / h  # We get the ratio of the target and the actual height
     target_w = int(ratio * w)
 
-    image.resize((target_w, target_h))
-    image = np.array(image)
+    image = cv2.resize(image, (target_w, target_h))
 
     return image, ratio, target_h, target_w
 
@@ -349,7 +349,7 @@ def build_yolo_annotations_for_images(
 
     print("Start building the annotations ...")
 
-    for img_id in used_imgs:
+    for img_id in tqdm(used_imgs):
 
         img_name = df_images.loc[img_id]["filename"]
         if Path.exists(input_img_folder / img_name):
@@ -380,10 +380,6 @@ def build_yolo_annotations_for_images(
         else:
             count_missing += 1
 
-        if count_exists % 500 == 0:
-            print("Exists : ", count_exists)
-            print("Missing : ", count_missing)
-
     print(f"Process finished successfully with {count_missing} missing images !")
 
     return valid_imagenames, count_exists, count_missing
@@ -550,7 +546,7 @@ def get_annotations_from_db(
     )
     conn.close()
 
-    return df_bboxes, df_images.set_index("id")  # , raw_category_info
+    return df_bboxes, df_images  # , raw_category_info
 
 
 def get_annotations_from_files(
@@ -675,7 +671,7 @@ def update_bounding_boxes_database(
     data_dir: Union[WindowsPath, str],
     images_dir: Union[WindowsPath, str],
     labels_folder_name: Union[str, WindowsPath],
-    new_csv_bounding_boxes: Union[WindowsPath, str],
+    new_csv_bounding_boxes: Optional[Union[WindowsPath, str]],
     df_bboxes: DataFrame,
     df_images: DataFrame,
     mapping_to_10cl: dict,
@@ -690,7 +686,7 @@ def update_bounding_boxes_database(
         data_dir (WindowsPath): path of the root data directory. It should contain a folder with all useful data for images and annotations
         images_dir (WindowsPath): path of the image directory. It should contain a folder with all images
         labels_folder_name (Union[str,WindowsPath]): the name of the labels folder or the path od this folder
-        new_csv_bounding_boxes (Union[WindowsPath,str]) : the path of the bounding boxes csv files with annotation corrections
+        new_csv_bounding_boxes (Optional[Union[WindowsPath,str]]) : the path of the bounding boxes csv files with annotation corrections
         df_bboxes (DataFrame): DataFrame with the bounding boxes informations (location X, Y and Height, Width)
         df_images (DataFrame): DataFrame with the image informations
         mapping_to_10cl (dict): dictionary to map categories from nb_classes to 10

diff --git a/src/plasticorigins/training/data/make_dataset2.py b/src/plasticorigins/training/data/make_dataset2.py
@@ -37,6 +37,7 @@ def main(args: Namespace) -> None:
         df_bboxes, df_images = get_annotations_from_db(
             args.user, args.password, args.bboxes_table
         )
+        df_images = df_images.set_index("id")
 
     else:
         print("either a password must be set, or bbox and images filenames")

diff --git a/tests/test_plasticorigins/training/test_data_processing.py b/tests/test_plasticorigins/training/test_data_processing.py
@@ -245,7 +245,7 @@ def test_get_annotations_from_db():
         )
 
         assert df_bboxes.shape == (9039, 9)
-        assert df_images.shape == (8126, 8)
+        assert df_images.shape == (8126, 9)
 
     else:
         print("EnvError : .env file not found")