Merge pull request #12 from cni/phi

PHI
cni · Apr 19, 2020 · 9864ee6 · 9864ee6
2 parents 02d2c7b + 308302f
commit 9864ee6
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 21 deletions.
diff --git a/classification_from_label.py b/classification_from_label.py
@@ -62,9 +62,15 @@ def _find_matches(label, list):
 
 def _compile_regex(string):
     """Generate the regex for label checking"""
-
-    regex = re.compile(r"(\b%s\b)|(_%s_)|(_%s)|(%s_)" % (string,string,string,string), re.IGNORECASE)
-
+    # Escape * for T2*
+    if string == 'T2*':
+        string = 'T2\*'
+        regex = re.compile(r"(\b%s\b)|(_%s_)|(_%s)|(%s_)|(t2star)" % (string,string,string,string), re.IGNORECASE)
+    # Prevent T2 from capturing T2*
+    elif string == 'T2':
+        regex = re.compile(r"(\b%s\b)|(_%s_)|(_%s$)|(%s_)" % (string,string,string,string), re.IGNORECASE)
+    else:
+        regex = re.compile(r"(\b%s\b)|(_%s_)|(_%s)|(%s_)" % (string,string,string,string), re.IGNORECASE)
     return regex
 
 
@@ -86,7 +92,7 @@ def is_anatomy_t1(label):
 # Anatomy, T2
 def is_anatomy_t2(label):
     regexes = [
-        re.compile('t2', re.IGNORECASE)
+        re.compile('t2[^*]*$', re.IGNORECASE)
     ]
     return regex_search_label(regexes, label)
 

diff --git a/manifest.json b/manifest.json
@@ -1,6 +1,6 @@
 {
   "name": "pfile-mr-classifier",
-  "label": "CNI: GE P-File Classifier",
+  "label": "CNI: GE P-File Metadata Import and Classification",
   "description": "Extracts GE P-File header and generates JSON metadata (.metadata.json) which is saved in Flywheel on the P-File's info object. This gear also attempts to determine the P-File's classification (measurement, intent, etc.).",
   "maintainer": "Michael Perry <[email protected]>",
   "author": "Michael Perry <[email protected]>",
@@ -9,11 +9,10 @@
   "cite": "pfile-tools: GE P-File Utilities (https://github.com/njvack/pfile-tools)",
   "license": "BSD-2-Clause",
   "flywheel": "0",
-  "version": "2.2.0",
+  "version": "2.3.0",
   "custom": {
-    "docker-image": "stanfordcni/pfile-mr-classifier:2.2.0",
     "gear-builder": {
-      "image": "stanfordcni/pfile-mr-classifier:2.2.0",
+      "image": "stanfordcni/pfile-mr-classifier:2.3.0",
       "category": "converter"
     },
     "flywheel": {

diff --git a/pfile-mr-classifier.py b/pfile-mr-classifier.py
@@ -10,7 +10,7 @@
 import zipfile
 import datetime
 import classification_from_label
-from pprint import pprint
+import pprint
 
 logging.basicConfig()
 log = logging.getLogger('pfile-mr-classifier')
@@ -137,9 +137,9 @@ def parse_patient_age(age):
 
     # Make sure that the age is reasonable
     if not age_in_seconds or age_in_seconds <= 0:
-        age_in_seconds = None
+        return None
 
-    return age_in_seconds
+    return int(age_in_seconds)
 
 
 def get_timestamp(pfile, timezone):
@@ -224,7 +224,7 @@ def get_pfile_comment(pfile):
         try:
             zip = zipfile.ZipFile(pfile)
             comment = json.loads(zip.comment)
-            pprint(comment)
+            log.info(pprint.pformat(comment))
             return comment
         except:
             return None
@@ -271,21 +271,21 @@ def pfile_classify(pfile, pfile_header_csv, pfile_name, outbase, timezone):
     # Subject Metadata
     metadata['session']['subject'] = {}
     metadata['session']['subject']['sex'] = get_sex_string(_pfile.patient_sex)
-    age = parse_patient_age(_pfile.patient_age)
-    metadata['session']['subject']['age'] = int(age) if age else 0
-    subname = _pfile.patient_name
-    if subname:
-        name = subname.split('^')
-        metadata['session']['subject']['lastname'] = name[0]
-        if len(name) == 2:
-            metadata['session']['subject']['firstname'] = name[1]
+    subject_age = parse_patient_age(_pfile.patient_age)
+    if subject_age:
+        metadata['session']['subject']['age'] = subject_age
+    if hasattr(_pfile, 'patient_weight_g') and _pfile.patient_weight_g:
+        # Return weight in kg
+        metadata['session']['subject']['weight'] = _pfile.patient_weight_g * 0.001
 
 
     # File metadata
     pfile_file = {}
     pfile_file['name'] = os.path.basename(pfile_name)
     pfile_file['modality'] = _pfile.exam_type
     pfile_file['info'] = extract_pfile_header(pfile_header_csv)
+    if pfile_file['info'].get('patient_name'):
+        pfile_file['info']['patient_name'] = 'REDACTED'
     pfile_file['classification'] = get_pfile_classification(_pfile)
 
     # Get a list of the files within the zip.
@@ -317,7 +317,7 @@ def pfile_classify(pfile, pfile_header_csv, pfile_name, outbase, timezone):
         json.dump(metadata, metafile)
 
     # Show the metadata
-    pprint(metadata)
+    log.info(pprint.pformat(metadata))
 
     return metafile_outname