update create json

NYPL · Oct 2, 2023 · 8c92d5b · 8c92d5b
1 parent 2812eee
commit 8c92d5b
Show file tree

Hide file tree

Showing 2 changed files with 85 additions and 30 deletions.
diff --git a/ami_scripts/config.json b/ami_scripts/config.json
@@ -1,30 +1,30 @@
 {
-    "replacements": [
-      {
-        "find": ";",
-        "replace": "-"
-      },
-      {
-        "find": "\\\\",
-        "replace": "-"
-      },
-      {
-        "find": "\\n",
-        "replace": "-"
-      },
-      {
-        "find": "\\r",
-        "replace": "-"
-      },
-      {
-        "find": "\\t",
-        "replace": "-"
-      },
-      {
-        "find": "\"",
-        "replace": "''"
-      }
-    ],
+  "replacements": [
+    {
+      "find": ";",
+      "replace": "-"
+    },
+    {
+      "find": "\\\\",
+      "replace": "-"
+    },
+    {
+      "find": "\\n",
+      "replace": "-"
+    },
+    {
+      "find": "\\r",
+      "replace": "-"
+    },
+    {
+      "find": "\\t",
+      "replace": "-"
+    },
+    {
+      "find": "\"",
+      "replace": "''"
+    }
+  ],
   "format_fixes": {
     "video cassette analog": [
       "Betacam",
@@ -97,5 +97,51 @@
       "VHS/PCM",
       "Hi8/PCM"
     ]
+  },
+  "digitizers": {
+    "Media Preserve": {
+      "organization": {
+        "address": {
+          "city": "Cranberry Township",
+          "postalCode": 16066,
+          "state": "PA",
+          "street1": "111 Thomsom Park Drive"
+        },
+        "name": "Preservation Technologies, L.P."
+      }
+    },
+    "Colorlab": {
+      "organization": {
+        "address": {
+          "city": "Rockville",
+          "postalCode": 20852,
+          "state": "MD",
+          "street1": "5708 Arundel Ave"
+        },
+        "name": "Colorlab"
+      }
+    },
+    "NYPL": {
+      "organization": {
+        "address": {
+          "city": "New York",
+          "postalCode": "10023",
+          "state": "NY",
+          "street1": "40 Lincoln Center Plaza"
+        },
+        "name": "New York Public Library"
+      }
+    },
+    "Memnon": {
+      "organization": {
+        "address": {
+          "city": "Bloomington",
+          "postalCode": 47408,
+          "state": "IN",
+          "street1": "2719 E 10th St"
+        },
+        "name": "Memnon Archiving Services"
+      }
+    }
   }
 }
diff --git a/ami_scripts/create_media_json.py b/ami_scripts/create_media_json.py
@@ -24,11 +24,13 @@ def get_args():
     parser = argparse.ArgumentParser(description="Create NYPL JSON Files from SPEC Export and user-supplied directory of media files")
     parser.add_argument('-c', '--config', required=True, help='Path to config file')
     parser.add_argument('-s', '--source', help='path to SPEC CSV Export', required=False)
-    parser.add_argument('-d', '--directory', help='path to directory of media files', required=False)
+    parser.add_argument('-m', '--media', help='path to directory of media files', required=False)  # Modified here
+    parser.add_argument('-d', '--digitizer', choices=['Media Preserve', 'NYPL', 'Memnon'], required=False, help='Name of the digitizer')
     parser.add_argument('-o', '--output', help='path to destination for JSON files', required=True)
     return parser.parse_args()
 
 
+
 def load_config(config_file):
     with open(config_file) as f:
         config = json.load(f)
@@ -55,15 +57,17 @@ def load_csv(args):
 
 def get_media_files(args):
     media_list = []
-    if args.directory:
+    if args.media:
         try:
-            media_dir = os.scandir(args.directory)
+            media_dir = os.scandir(args.media)
             for entry in media_dir:
                 if entry.is_file() and entry.name.lower().endswith(tuple(valid_extensions)):
                     media_list.append(entry.path)
             media_list.sort()
         except OSError as e:
             logger.error(f"Error getting media files: {e}")
+    if media_list:
+        logger.info(f"Found these files: {', '.join(media_list)}")
     return media_list
 
 
@@ -155,6 +159,8 @@ def create_new_json(args, media_data, config):
             'filesize': {'measure': media_data['file_size'], 'unit': 'B'}
         }
     }
+    if args.digitizer:
+        nested_json['digitizer'] = config['digitizers'][args.digitizer]
 
     # Remove any keys in the 'technical' dictionary that have a value of None
     nested_json['technical'] = {k: v for k, v in nested_json['technical'].items() if v is not None}
@@ -174,7 +180,10 @@ def process_media_files(args, data_dict, media_list, config):
             cms_id = media_data['cms_id']
             if cms_id in data_dict:
                 media_data['bibliographic'] = data_dict[cms_id]
-            create_new_json(args, media_data, config)
+                logger.info(f"Now making JSON for {media_data['filename']} file")
+                create_new_json(args, media_data, config)
+            else:
+                logger.warning(f"{media_data['filename']} File not found in SPEC CSV Export (data dict)")
 
 
 def main():