Skip to content

Commit

Permalink
framing update
Browse files Browse the repository at this point in the history
institution action update

consortium redesign

moving unapproved consortiums to "forReview"

updating consortia

build script fix.

generation of outputs in JSONLD/scripts/outputs

symbolic link to ld compiled outputs

adding new inst
  • Loading branch information
wolfiex committed Jun 11, 2024
1 parent 5e9f49b commit 141db1e
Show file tree
Hide file tree
Showing 125 changed files with 74,622 additions and 863,363 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/add-Institution.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ To get an institutions ROR code we can see if it exists on ror.org. For the entr

``` configfile
[institutions]
[institution]
Acronym = "CMIP-IPO"
Full_Name = "Coupled Model Intercomparison Project: International Project Office"
ROR = "000fg4e24"
Expand Down
Binary file not shown.
13 changes: 13 additions & 0 deletions .github/libs/action_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,30 @@ def dispatch(token,payload,repo):


def update_issue_title (issue_number,kind,payload):
if issue_number < 0:
print('Updating: ',payload["client_payload"]["name"])
return 0
# change issue name to reflect contents.
print(os.popen(f'gh issue edit {issue_number} --title "Add {kind}: {payload["client_payload"]["name"]}"').read())


def update_issue(issue_number,comment,err=True):


if issue_number < 0:
print(comment)
return 0

out = os.popen(f'gh issue comment {issue_number} --body "{comment}"')

if err:
print(out)
sys.exit(comment)

def close_issue(issue_number, comment,err=True):
if issue_number < 0:
print(comment)
return 0
print(os.popen(f'gh issue close {issue_number} -c "{comment}"'))
if err: sys.exit(comment)

Expand Down
7 changes: 3 additions & 4 deletions .github/libs/add/Institution.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,23 @@

from action_functions import parse_md, dispatch, update_issue_title


# generic
issue_number = os.environ.get('ISSUE_NUMBER')
issue_title = os.environ.get('ISSUE_TITLE')
issue_body = os.environ.get('ISSUE_BODY')
issue_submitter = os.environ.get('ISSUE_SUBMITTER')
repo = os.environ.get('REPO').replace('https://github.com','https://api.github.com/repos')
token = os.environ.get('GH_TOKEN')


# get content.
parsed = parse_md(issue_body)


'''
Lets submit the data to a dispatch event
'''


data = parsed['institutions']
data = parsed['institution']


kind = __file__.split('/')[-1].replace('.py','')
Expand Down
Binary file added .github/libs/checks/.DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions .github/libs/checks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from . import schema
from . import institution
Binary file not shown.
Binary file not shown.
Binary file not shown.
25 changes: 25 additions & 0 deletions .github/libs/checks/institution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os, json, sys, glob

def validate(jsn,iloc):
path = os.path.dirname(jsn['@id']).split(':')[-1]

errors = []
close = []

# assert os.path.exists(loc), f"Path does not exist: {loc}"
# existing = [os.path.splitext(os.path.basename(f))[0] for f in glob.glob(f"{loc}/*.json")]

if os.path.exists(iloc):
close.append(f"Current institution already exists:\n see {iloc}")


if path not in iloc:
errors.append(f"@id / location do not match:\n {path} || {jsn['@id']}")


graph = open(os.path.dirname(iloc)+"/graph.json",'r').read()
if jsn['institution:ror'] in graph:
close.append(f"ROR entry already exists in graph. \n EXITING: {jsn['institution:ror']}")


return close,errors
33 changes: 33 additions & 0 deletions .github/libs/checks/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

import os, json
import jsonschema
from jsonschema import validate


def rdjsn(f):
return json.load(open(f,'r'))

def validate_json(jsn):

if not isinstance(jsn, dict):
# if we do not give a file, read this
jsn = rdjsn(jsn)
name = os.path.basename(jsn['@id'])

schema_url = os.path.dirname(jsn['@id']).split(':')[-1]
toplevel = os.popen('git rev-parse --show-toplevel').read().strip()

schema_loc = f"{toplevel}/JSONLD/{schema_url}/schema.json"
# outfile guarantees that we must run this

schema = rdjsn(schema_loc)

try:
validate(instance=jsn, schema=schema)
print(f"Validation succeeded: {name}")
return True, f"Validation succeeded: {name}"
except jsonschema.exceptions.ValidationError as err:
print("Validation error:", err.message, name)
return False, "Validation error:\n {err.message}\n RelevantFile: {jsn['@id']}", False


173 changes: 108 additions & 65 deletions .github/libs/parse/Institution.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
import json, os, sys
import json, os, sys, glob
from collections import OrderedDict

path = f'organisations/institutions'
toplevel = os.popen('git rev-parse --show-toplevel').read().strip()
loc = f"{toplevel}/JSONLD/{path}/"



# Get the parent directory of the current file
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
sys.path.append(parent_dir)

import checks
from checks import schema,institution
from action_functions import update_issue,jr,jw,getfile,close_issue,pp

# data
issue_number = os.environ['ISSUE']
issue_number = int(os.environ['ISSUE'])
data = os.environ['PAYLOAD_DATA']
data = json.loads(str(data))


# Load Existing
institutions = jr(getfile('institutions')[0])
ilist = institutions['institutions']



data['acronym'] = data['acronym'].replace(' ','')

'''
Functions
Expand Down Expand Up @@ -49,95 +49,110 @@ def get_ror_data(name):



def parse_ror_data(ror_data):
def parse_ror_data(cmip_acronym,ror_data):
"""Parse ROR data and return relevant information."""
if ror_data:

return {
"identifiers": {
'institution_name': ror_data['name'],
'aliases': ror_data.get('aliases', []),
'acronyms': ror_data.get('acronyms', []),
'labels': [i['label'] for i in ror_data.get('lables', [])],
'ror': ror_data['id'].split('/')[-1],
'url': ror_data.get('links', []),
'established': ror_data.get('established'),
'type': ror_data.get('types', [])[0] if ror_data.get('types') else None,
},
"location": {
'lat': ror_data['addresses'][0].get('lat') if ror_data.get('addresses') else None,
'lon': ror_data['addresses'][0].get('lng') if ror_data.get('addresses') else None,
# 'latest_address': ror_data['addresses'][0].get('line') if ror_data.get('addresses') else None,
'city': ror_data['addresses'][0].get('city') if ror_data.get('addresses') else None,
# 'country': ror_data['country']['country_name'] if ror_data.get('country') else None
'country': list(ror_data['country'].values()) if ror_data.get('country') else None
},
"consortiums":[]
"@id": f"mip-cmor-tables:organisations/institutions/{cmip_acronym.lower()}",
"@type": "cmip:institution",
"institution:cmip_acronym": cmip_acronym,
"institution:ror": ror_data['id'].split('/')[-1],
"institution:name": ror_data['name'],
"institution:url": ror_data.get('links', []) ,
"institution:established": ror_data.get('established'),
"institution:type": ror_data.get('types', [])[0] if ror_data.get('types') else None,
"institution:labels": [i['label'] for i in ror_data.get('lables', [])],
"institution:aliases": ror_data.get('aliases', []),
"institution:acronyms": ror_data.get('acronyms', []),
"institution:location": {
"@id": f"mip-cmor-tables:organisations/institutions/location/{ror_data['id'].split('/')[-1]}",
"@type": "institution:location",
"@nest": {
"location:lat": ror_data['addresses'][0].get('lat') if ror_data.get('addresses') else None,
"location:lon": ror_data['addresses'][0].get('lat') if ror_data.get('addresses') else None,
"location:city": ror_data['addresses'][0].get('city') if ror_data.get('addresses') else None,
"location:country": list(ror_data['country'].values()) if ror_data.get('country') else None
}
}
# can reverse match consortiums or members from here.

}
else:
return None


# def search_ror(query):

# import requests,json
# import urllib.parse

# # Strip out strange characters and insert in the desired format
# format_name = lambda n : urllib.parse.quote(n)
# # Make the API call
# url = 'https://api.ror.org/organizations?affiliation=%{}s'
'''
Get the Data
'''

# response = requests.get(url.format(query))

# # Check if the request was successful
# if response.status_code == 200:
# data = response.json()
# if data.get('items'):
# org = data['items'][0].get('organization')
# return data['items'][0]['score'],org['id'].split('/')[-1], org['name']
# else: return None,None,None
# else:
# print(f"Error: {response.status_code} - {response.text}")
# return None,None,None
dta = get_ror_data(data['ror'])
new_entry = parse_ror_data(data['acronym'],dta)


outfile = f"{loc}{data['acronym'].lower()}.json"

# data = parsed['institutions']
# data['institutions'] = parsed['institutions']['cmip6_acronyms']
close,errors = checks.institution.validate(new_entry,outfile)


for error in close:
update_issue(issue_number,f'# Closing issue. \n {error} \n\n Please review request and resubmit.')

for error in errors:
update_issue(issue_number,f'# {error} \n\n Please update (edit) the entry above.')

'''
Get the Data
'''

if data['acronym'] in ilist:
close_issue(issue_number,f'# Closing issue. \n {data["acronym"]} already exists in the institution list. \n\n Please review request and resubmit.')
valid,validation_message = checks.schema.validate_json(new_entry)

dta = get_ror_data(data['ror'])
new_entry = parse_ror_data(dta)
if valid:
update_issue(issue_number,validation_message,False)
else:
error = f"Schema Failed.\n\n Please update the entry above. {validation_message}"
# this exists the script.
update_issue(issue_number,error,err=True)


update_issue(issue_number,f"# Sanity Check: \n Is '{data['full_name']}' the same as '{new_entry['identifiers']['institution_name']}'",False)



ilist[data['acronym']] = new_entry
update_issue(issue_number,f"# Sanity Check: \n Is '{data['full_name']}' the same as '{new_entry['institution:name']}'",False)

# print for pull request
pp( {data['acronym'] : new_entry })

jsn_ordered = OrderedDict(sorted(new_entry.items(), key=lambda item: item[0]))


ilist = OrderedDict(sorted(ilist.items(), key=lambda item: item[0]))

institutions['institutions'] = ilist


if 'SUBMIT' in os.environ:
if len(close):
sys.exit(' skipping the submission.' )
if os.environ['SUBMIT'] == 'none':
sys.exit(' skipping the submission.' )
elif os.environ['SUBMIT'] == 'manual':
inp = input('Submit to the repository? [y/n]')
if not inp.lower() != 'y':
sys.exit(' skipping the submission.' )
else:
sys.exit(' skipping the submission.' )

# Serialize back to JSON
jw(institutions, getfile('institutions')[0])
jw(jsn_ordered, outfile)

# normal entries if not specified.
os.popen(f'git add -A"').read()
os.popen(f'git commit -m "New entry {data["acronym"]} to the Institutions file"').read()
if 'OVERRIDE_AUTHOR' in os.environ:
os.popen(f'git commit --author="{os.environ["OVERRIDE_AUTHOR"]} {os.environ["OVERRIDE_AUTHOR"]}@users.noreply.github.com" -m "New entry {data["acronym"]} to the Institutions LD file"').read()
else:
os.popen(f'git commit -m "New entry {data["acronym"]} to the Institutions LD file"').read()







Expand All @@ -147,3 +162,31 @@ def parse_ror_data(ror_data):




# def search_ror(query):

# import requests,json
# import urllib.parse

# # Strip out strange characters and insert in the desired format
# format_name = lambda n : urllib.parse.quote(n)
# # Make the API call
# url = 'https://api.ror.org/organizations?affiliation=%{}s'

# response = requests.get(url.format(query))

# # Check if the request was successful
# if response.status_code == 200:
# data = response.json()
# if data.get('items'):
# org = data['items'][0].get('organization')
# return data['items'][0]['score'],org['id'].split('/')[-1], org['name']
# else: return None,None,None
# else:
# print(f"Error: {response.status_code} - {response.text}")
# return None,None,None



# data = parsed['institutions']
# data['institutions'] = parsed['institutions']['cmip6_acronyms']
Loading

0 comments on commit 141db1e

Please sign in to comment.