framing update

institution action update consortium redesign moving unapproved consortiums to "forReview" updating consortia build script fix. generation of outputs in JSONLD/scripts/outputs symbolic link to ld compiled outputs adding new inst
PCMDI · Jun 11, 2024 · 141db1e · 141db1e
1 parent 5e9f49b
commit 141db1e
Show file tree

Hide file tree

Showing 125 changed files with 74,622 additions and 863,363 deletions.
diff --git a/.github/ISSUE_TEMPLATE/add-Institution.md b/.github/ISSUE_TEMPLATE/add-Institution.md
@@ -29,7 +29,7 @@ To get an institutions ROR code we can see if it exists on ror.org. For the entr
 
 ``` configfile
 
-[institutions]
+[institution]
     Acronym = "CMIP-IPO"
     Full_Name = "Coupled Model Intercomparison Project: International Project Office"
     ROR = "000fg4e24"

diff --git a/.github/libs/__pycache__/action_functions.cpython-310.pyc b/.github/libs/__pycache__/action_functions.cpython-310.pyc
diff --git a/.github/libs/action_functions.py b/.github/libs/action_functions.py
@@ -67,17 +67,30 @@ def dispatch(token,payload,repo):
 
 
 def update_issue_title (issue_number,kind,payload):
+    if issue_number < 0:
+        print('Updating: ',payload["client_payload"]["name"])
+        return 0
     # change issue name to reflect contents. 
     print(os.popen(f'gh issue edit {issue_number} --title "Add {kind}: {payload["client_payload"]["name"]}"').read())
 
 
 def update_issue(issue_number,comment,err=True):
+
+
+    if issue_number < 0:
+        print(comment)
+        return 0
+
     out = os.popen(f'gh issue comment {issue_number} --body "{comment}"')
+
     if err: 
         print(out)
         sys.exit(comment)
 
 def close_issue(issue_number, comment,err=True):
+    if issue_number < 0:
+        print(comment)
+        return 0
     print(os.popen(f'gh issue close {issue_number} -c "{comment}"'))
     if err: sys.exit(comment)
 

diff --git a/.github/libs/add/Institution.py b/.github/libs/add/Institution.py
@@ -11,24 +11,23 @@
 
 from action_functions import parse_md, dispatch, update_issue_title
 
-
+# generic 
 issue_number = os.environ.get('ISSUE_NUMBER')
 issue_title = os.environ.get('ISSUE_TITLE')
 issue_body = os.environ.get('ISSUE_BODY')
 issue_submitter = os.environ.get('ISSUE_SUBMITTER')
 repo = os.environ.get('REPO').replace('https://github.com','https://api.github.com/repos')
 token = os.environ.get('GH_TOKEN')
 
-
+#  get content. 
 parsed = parse_md(issue_body)
 
 
 '''
 Lets submit the data to a dispatch event
 '''
 
-
-data = parsed['institutions']
+data = parsed['institution']
 
 
 kind = __file__.split('/')[-1].replace('.py','')

diff --git a/.github/libs/checks/.DS_Store b/.github/libs/checks/.DS_Store
diff --git a/.github/libs/checks/__init__.py b/.github/libs/checks/__init__.py
@@ -0,0 +1,2 @@
+from . import schema
+from . import institution
diff --git a/.github/libs/checks/__pycache__/__init__.cpython-310.pyc b/.github/libs/checks/__pycache__/__init__.cpython-310.pyc
diff --git a/.github/libs/checks/__pycache__/institution.cpython-310.pyc b/.github/libs/checks/__pycache__/institution.cpython-310.pyc
diff --git a/.github/libs/checks/__pycache__/schema.cpython-310.pyc b/.github/libs/checks/__pycache__/schema.cpython-310.pyc
diff --git a/.github/libs/checks/institution.py b/.github/libs/checks/institution.py
@@ -0,0 +1,25 @@
+import os, json, sys, glob
+
+def validate(jsn,iloc):
+    path = os.path.dirname(jsn['@id']).split(':')[-1]
+
+    errors = []  
+    close = []  
+
+    # assert os.path.exists(loc), f"Path does not exist: {loc}"
+    # existing = [os.path.splitext(os.path.basename(f))[0] for f in  glob.glob(f"{loc}/*.json")]
+
+    if os.path.exists(iloc):
+        close.append(f"Current institution already exists:\n see {iloc}")
+
+
+    if path not in iloc:
+        errors.append(f"@id / location do not match:\n {path} || {jsn['@id']}")
+
+
+    graph = open(os.path.dirname(iloc)+"/graph.json",'r').read()
+    if jsn['institution:ror'] in graph:
+        close.append(f"ROR entry already exists in graph. \n EXITING: {jsn['institution:ror']}")
+
+
+    return close,errors
diff --git a/.github/libs/checks/schema.py b/.github/libs/checks/schema.py
@@ -0,0 +1,33 @@
+
+import os, json
+import jsonschema
+from jsonschema import validate
+
+
+def rdjsn(f):
+    return json.load(open(f,'r'))
+
+def validate_json(jsn):
+
+    if not isinstance(jsn, dict):
+        # if we do not give a file, read this
+        jsn = rdjsn(jsn)
+    name = os.path.basename(jsn['@id'])
+
+    schema_url = os.path.dirname(jsn['@id']).split(':')[-1]
+    toplevel = os.popen('git rev-parse --show-toplevel').read().strip()
+
+    schema_loc = f"{toplevel}/JSONLD/{schema_url}/schema.json"
+    # outfile guarantees that we must run this
+
+    schema = rdjsn(schema_loc)
+
+    try:
+        validate(instance=jsn, schema=schema)
+        print(f"Validation succeeded: {name}")
+        return True, f"Validation succeeded: {name}" 
+    except jsonschema.exceptions.ValidationError as err:
+        print("Validation error:", err.message, name)
+        return False, "Validation error:\n {err.message}\n RelevantFile: {jsn['@id']}", False
+
+
diff --git a/.github/libs/parse/Institution.py b/.github/libs/parse/Institution.py
@@ -1,24 +1,24 @@
-import json, os, sys
+import json, os, sys, glob
 from collections import OrderedDict
 
+path = f'organisations/institutions'
+toplevel = os.popen('git rev-parse --show-toplevel').read().strip()
+loc = f"{toplevel}/JSONLD/{path}/"
+
+
+
 # Get the parent directory of the current file
 parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 sys.path.append(parent_dir)
-
+import checks
+from checks import schema,institution
 from action_functions import update_issue,jr,jw,getfile,close_issue,pp
 
 # data
-issue_number = os.environ['ISSUE']
+issue_number = int(os.environ['ISSUE'])
 data = os.environ['PAYLOAD_DATA']
 data = json.loads(str(data))
-
-
-# Load Existing
-institutions = jr(getfile('institutions')[0])
-ilist = institutions['institutions']
-
-
-
+data['acronym'] = data['acronym'].replace(' ','')
 
 '''
 Functions 
@@ -49,95 +49,110 @@ def get_ror_data(name):
 
 
 
-def parse_ror_data(ror_data):
+def parse_ror_data(cmip_acronym,ror_data):
     """Parse ROR data and return relevant information."""
     if ror_data:
 
         return {
-            "identifiers": {
-                'institution_name': ror_data['name'],
-                'aliases': ror_data.get('aliases', []),
-                'acronyms': ror_data.get('acronyms', []),
-                'labels': [i['label'] for i in ror_data.get('lables', [])],
-                'ror': ror_data['id'].split('/')[-1],
-                'url': ror_data.get('links', []),
-                'established': ror_data.get('established'),
-                'type': ror_data.get('types', [])[0] if ror_data.get('types') else None,
-            },
-            "location": {
-                'lat': ror_data['addresses'][0].get('lat') if ror_data.get('addresses') else None,
-                'lon': ror_data['addresses'][0].get('lng') if ror_data.get('addresses') else None,
-                # 'latest_address': ror_data['addresses'][0].get('line') if ror_data.get('addresses') else None,
-                'city': ror_data['addresses'][0].get('city') if ror_data.get('addresses') else None,
-            #     'country': ror_data['country']['country_name'] if ror_data.get('country') else None
-                'country': list(ror_data['country'].values())  if ror_data.get('country') else None
-            },
-            "consortiums":[]
+            "@id": f"mip-cmor-tables:organisations/institutions/{cmip_acronym.lower()}",
+            "@type": "cmip:institution",
+            "institution:cmip_acronym": cmip_acronym,
+            "institution:ror": ror_data['id'].split('/')[-1],
+            "institution:name": ror_data['name'],
+            "institution:url": ror_data.get('links', []) ,
+            "institution:established": ror_data.get('established'),
+            "institution:type": ror_data.get('types', [])[0] if ror_data.get('types') else None,
+            "institution:labels": [i['label'] for i in ror_data.get('lables', [])],
+            "institution:aliases": ror_data.get('aliases', []),
+            "institution:acronyms": ror_data.get('acronyms', []),
+            "institution:location": {
+                "@id": f"mip-cmor-tables:organisations/institutions/location/{ror_data['id'].split('/')[-1]}",
+                "@type": "institution:location",
+                "@nest": {
+                    "location:lat":  ror_data['addresses'][0].get('lat') if ror_data.get('addresses') else None,
+                    "location:lon":  ror_data['addresses'][0].get('lat') if ror_data.get('addresses') else None,
+                    "location:city": ror_data['addresses'][0].get('city') if ror_data.get('addresses') else None,
+                    "location:country": list(ror_data['country'].values())  if ror_data.get('country') else None
+                }
+            }         
+            #  can reverse match consortiums or members from here.    
 
         }
     else:
         return None
 
 
-# def search_ror(query):
 
-#     import requests,json
-#     import urllib.parse
-
-#     # Strip out strange characters and insert in the desired format
-#     format_name = lambda n : urllib.parse.quote(n)
-#     # Make the API call
-#     url = 'https://api.ror.org/organizations?affiliation=%{}s'
+'''
+Get the Data
+'''
 
-#     response = requests.get(url.format(query))
 
-#     # Check if the request was successful
-#     if response.status_code == 200:
-#         data = response.json()
-#         if data.get('items'):
-#             org = data['items'][0].get('organization')
-#             return data['items'][0]['score'],org['id'].split('/')[-1], org['name']
-#         else: return None,None,None
-#     else:
-#         print(f"Error: {response.status_code} - {response.text}")
-#         return None,None,None
+dta = get_ror_data(data['ror'])
+new_entry = parse_ror_data(data['acronym'],dta)
 
 
+outfile = f"{loc}{data['acronym'].lower()}.json"
 
-# data = parsed['institutions']
-# data['institutions'] = parsed['institutions']['cmip6_acronyms']
+close,errors = checks.institution.validate(new_entry,outfile)
 
 
+for error in close:
+    update_issue(issue_number,f'# Closing issue. \n {error} \n\n Please review request and resubmit.')
+
+for error in errors:
+    update_issue(issue_number,f'# {error} \n\n Please update (edit) the entry above.')
 
-'''
-Get the Data
-'''
 
-if data['acronym'] in ilist:
-  close_issue(issue_number,f'# Closing issue. \n {data["acronym"]} already exists in the institution list. \n\n Please review request and resubmit.')
+valid,validation_message = checks.schema.validate_json(new_entry)
 
-dta = get_ror_data(data['ror'])
-new_entry = parse_ror_data(dta)
+if valid:
+    update_issue(issue_number,validation_message,False)
+else:
+    error = f"Schema Failed.\n\n Please update the entry above. {validation_message}"
+    # this exists the script. 
+    update_issue(issue_number,error,err=True) 
 
 
-update_issue(issue_number,f"# Sanity Check: \n Is '{data['full_name']}' the same as '{new_entry['identifiers']['institution_name']}'",False)
 
+
 
-ilist[data['acronym']] = new_entry
+update_issue(issue_number,f"# Sanity Check: \n Is '{data['full_name']}' the same as '{new_entry['institution:name']}'",False)
 
 # print for pull request
 pp( {data['acronym'] : new_entry })
 
+jsn_ordered = OrderedDict(sorted(new_entry.items(), key=lambda item: item[0]))
+
 
-ilist = OrderedDict(sorted(ilist.items(), key=lambda item: item[0]))
 
-institutions['institutions'] = ilist
 
+
+if 'SUBMIT' in os.environ:
+    if len(close):
+        sys.exit(' skipping the submission.' )
+    if os.environ['SUBMIT'] == 'none':
+        sys.exit(' skipping the submission.' )
+    elif os.environ['SUBMIT'] == 'manual':
+        inp = input('Submit to the repository? [y/n]')
+        if not inp.lower() != 'y':
+            sys.exit(' skipping the submission.' )
+    else:
+        sys.exit(' skipping the submission.' )
+
 # Serialize back to JSON
-jw(institutions, getfile('institutions')[0])
+jw(jsn_ordered, outfile)
 
+# normal entries if not specified.
 os.popen(f'git add -A"').read()
-os.popen(f'git commit -m "New entry {data["acronym"]} to the Institutions file"').read()
+if 'OVERRIDE_AUTHOR' in os.environ:
+    os.popen(f'git commit --author="{os.environ["OVERRIDE_AUTHOR"]} {os.environ["OVERRIDE_AUTHOR"]}@users.noreply.github.com" -m "New entry {data["acronym"]} to the Institutions LD file"').read()
+else: 
+    os.popen(f'git commit -m "New entry {data["acronym"]} to the Institutions LD file"').read()
+
+
+
+
 
 
 
@@ -147,3 +162,31 @@ def parse_ror_data(ror_data):
 
 
 
+
+# def search_ror(query):
+
+#     import requests,json
+#     import urllib.parse
+
+#     # Strip out strange characters and insert in the desired format
+#     format_name = lambda n : urllib.parse.quote(n)
+#     # Make the API call
+#     url = 'https://api.ror.org/organizations?affiliation=%{}s'
+
+#     response = requests.get(url.format(query))
+
+#     # Check if the request was successful
+#     if response.status_code == 200:
+#         data = response.json()
+#         if data.get('items'):
+#             org = data['items'][0].get('organization')
+#             return data['items'][0]['score'],org['id'].split('/')[-1], org['name']
+#         else: return None,None,None
+#     else:
+#         print(f"Error: {response.status_code} - {response.text}")
+#         return None,None,None
+
+
+
+# data = parsed['institutions']
+# data['institutions'] = parsed['institutions']['cmip6_acronyms']
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from . import schema
		from . import institution