diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 1ae103e..789a240 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -35,6 +35,18 @@ jobs:
uses: actions/checkout@v2
with:
path: ror-api
+ - name: Checkout ror-data-test
+ uses: actions/checkout@v2
+ with:
+ repository: ror-community/ror-data-test
+ token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+ path: ror-data-test
+ - name: Get last data dump name
+ working-directory: ./ror-data-test
+ run: |
+ FILE="$(ls -Art *.zip | tail -n 1)"
+ echo ${FILE%.*}
+ echo "LATEST_DUMP_FILE=${FILE%.*}" >> $GITHUB_ENV
- name: Cache dependency
uses: actions/cache@v2
with:
@@ -57,7 +69,7 @@ jobs:
- name: Setup
working-directory: ./ror-api
run: |
- python manage.py setup v1.35-2023-10-26-ror-data -t
+ python manage.py setup v1.42-2024-02-21-ror-data -t
# Dump file temp hard coded for v2 beta
# Pulled from ror-data-test per settings.py config
- name: Test
diff --git a/Dockerfile b/Dockerfile
index 26f8583..9ebc84e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -16,7 +16,7 @@ RUN mv /etc/apt/sources.list.d /etc/apt/sources.list.d.bak && \
mv /etc/apt/sources.list.d.bak /etc/apt/sources.list.d && \
apt-get upgrade -y -o Dpkg::Options::="--force-confold" && \
apt-get clean && \
- apt-get install ntp wget unzip tzdata python3-pip -y && \
+ apt-get install ntp wget unzip tzdata python3-pip libmagic1 -y && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Enable Passenger and Nginx and remove the default site
diff --git a/README.md b/README.md
index bd934d5..9d2ec34 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,3 @@
-[](https://travis-ci.com/ror-community/ror-api)
-
# Research Organization Registry (ROR) API
The ROR API allows retrieving, searching and filtering the organizations indexed in ROR. The results are returned in JSON. See https://ror.readme.io for documentation.
@@ -154,3 +152,169 @@ ROR dataset ZIP archive created
This will create a new `data/ror-2020-03-15` folder, containing a `ror.json` and `ror.zip`. To finish the process, add the new folder to git and push to the GitHub repo.
To install the updated ROR data, run `./manage.py setup`.
+
+## Create new record file (v2 only)
+
+Making a POST request `/organizations` performs the following actions:
+- Populates fields with supplied values
+- Adds default values for optional fields
+- Populates Geonames details fields with values from the Geonames API, based on the Geonames ID provided
+- Validates submitted metadata against the ROR schema. Note that only schema validation is performed - additional tests included in [validation-suite]I(https://github.com/ror-community/validation-suite), such as checking relationship pairs, are not performed.
+- Orders fields and values within fields alphabetically (consistent with API behavior)
+- Returns JSON that can be saved to a file and used during the [ROR data release creation & deployment process](https://github.com/ror-community/ror-records?tab=readme-ov-file#ror-data-release-creation--deployment-steps)
+
+**A POST request to this route DOES NOT immediately add a new record to the ROR API.**
+
+### Usage
+
+1. Prepare a JSON file formatted according to the [ROR v2 JSON schema](https://github.com/ror-community/ror-schema/blob/schema-v2/ror_schema_v2_0.json). Ensure that all required fields EXCEPT `id` contain values. DO NOT include a value in the `id` field or in geonames_details fields. These values will be generated. Optional fields and `id` field may be omitted.
+
+2. Make a POST request to `/organizations` with the JSON file as the data payload. Credentials are required for POST requests.
+
+ curl -X POST -H "Route-User: [API USER]" -H "Token: [API TOKEN]" "http://api.dev.ror.org/v2/organizations" -d @[PATH TO JSON FILE].json -H "Content-Type: application/json"
+
+3. The response is a schema-valid JSON object populated with the submitted metadata as well as a ROR ID and Geonames details retrieved from Geonames. Fields and values will be ordered as in the ROR API and optional fields will be populated with empty or null values. Redirect the response to a file for use in the ROR data deployment process. **The resulting record is NOT added to the the ROR index.**
+
+## Update existing record file (v2 only)
+
+Making a PUT request `/organizations/[ROR ID]` performs the following actions:
+
+- Ovewrites fields with supplied values
+- Populates Geonames details fields with values from the Geonames API, based on the Geonames ID provided
+- Validates submitted metadata against the ROR schema. Note that only schema validation is performed - additional tests included in [validation-suite]I(https://github.com/ror-community/validation-suite), such as checking relationship pairs, are not performed.
+- Orders fields and values within fields alphabetically (consistent with API behavior)
+- Returns JSON that can be saved to a file and used during the [ROR data release creation & deployment process](https://github.com/ror-community/ror-records?tab=readme-ov-file#ror-data-release-creation--deployment-steps)
+
+**A PUT request to this route DOES NOT immediately update a record in the ROR API.**
+
+### Usage
+
+1. Prepare a JSON file formatted according to the [ROR v2 JSON schema](https://github.com/ror-community/ror-schema/blob/schema-v2/ror_schema_v2_0.json). It is only necessary to include the `id` field and any fields that you wish to update. Existing field values will be overwritten by values included in the file. If you wish to delete all existing values from a field, include the field in the JSON file with value `[]` (multi-value fields) or `null` (single-value fields). Geonames details will be updated during record generation regardless of which fields are included in the JSON.
+
+2. Make a PUT request to `/organizations/[ROR ID]` with the JSON file as the data payload. Credentials are required for PUT requests. The ROR ID specified in the request path must match the ROR ID in the `id` field of the JSON data.
+
+ curl -X PUT -H "Route-User: [API USER]" -H "Token: [API TOKEN]" "http://api.dev.ror.org/v2/organizations/[ROR ID]" -d @[PATH TO JSON FILE].json -H "Content-Type: application/json"
+
+3. The response is a schema-valid JSON object populated with the updates in the submitted metadata as well as updated Geonames details retrieved from Geonames. Fields and values will be ordered as in the ROR API and optional fields will be populated with empty or null values. Redirect the response to a file for use in the ROR data deployment process. **The resulting record is NOT updated in the the ROR index.**
+
+## Create/update multiple record files from a CSV
+
+Making a POST request `/organizations/bulkupdate` performs the following actions:
+
+- Validates the CSV file to ensure that it contains all required columns
+- Loops through each row and performs the following actions:
+ - If no value is included in `ror_id` column, attempt to create a new record file with values specified in the CSV
+ - If a value is included in `ror_id`, attempt to retrieve the existing record and create an updated record file with changes specified in the CSV
+ - If validation or other errors occur during record creation, the row is skipped and error(s) are recorded in the report.csv file
+- Generates a zipped file containing files for all new/updated records, as well as a report.csv file with a row for each row in the input CSV and a copy of the input CSV file
+- Uploads the zipped file to AWS S3
+- Returns a message with the URL for the zipped file and a summary message with counts of records created/updated/skipped
+- Records can be downloadede from S3 and used during the [ROR data release creation & deployment process](https://github.com/ror-community/ror-records?tab=readme-ov-file#ror-data-release-creation--deployment-steps)
+
+**A POST request to this route DOES NOT immediately add new/udpated records to the ROR API.**
+
+### Usage
+
+1. Prepare a CSV file as specified below with 1 row for each new or updated record. New and updated records can be included in the same file.
+
+2. Make a POST request to `/bulkupdate`` with the filepath specfied in the file field of a multi-part form payload. Credentials are required for POST requests.
+
+ curl -X POST -H "Route-User: [API USER]" -H "Token: [API TOKEN]" 'https://api.dev.ror.org/v2/bulkupdate' --form 'file=@"[PATH TO CSV FILE].csv"'
+
+3. The response is a summary with counts of records created/updated/skipped and a link to download the generated files from AWS S3.
+
+ {"file":"https://s3.eu-west-1.amazonaws.com/2024-03-09_15_56_26-ror-records.zip","rows processed":208,"created":207,"udpated":0,"skipped":1}
+
+The zipped file contains the following items:
+- **input.csv:** Copy of the CSV submitted to the API
+- **report.csv:** CSV with a row for each processed row in the input CSV, with indication of whether it was created, updated or skipped. If a record was created, its new ROR ID is listed in the `ror_id` column. If a record was skipped, the reasons(s) are listed in the `errors` column.
+- **new:** Directory containing JSON files for records that were successully created (omitted if no records were created)
+- **updates:** A directory containing JSON files for records that were successfully updated (omitted if no records were updated)
+
+#### Validate only
+Use the `?validate` parameter to simulate running the bulkupdate request without actually generating files. The response is the same CSV report described above.
+
+1. Make a POST request to `/bulkupdate?validate`` with the filepath specfied in the file field of a multi-part form payload. Credentials are required for POST requests. Makre sure to redirect the output to a CSV file on your machine.
+
+ curl -X POST -H "Route-User: [API USER]" -H "Token: [API TOKEN]" 'https://api.dev.ror.org/v2/bulkupdate?validate' --form 'file=@"[PATH TO CSV FILE].csv"' > report.csv
+
+
+### CSV formatting
+
+#### Column headings & values
+
+- All column headings below must be included, but they are not required to contain values
+- Columns can be in any order
+- Additional columns can be included, at any position
+- For new records, `ror_id` column value must be empty
+- For updated records, `ror_id` column must contain the ROR ID for the existing production record you would like to update
+- For list fields, multiple values should be separated with `;` (with or without a trailing space). The last value in a list can be followed by a trailing `;` (or not - behavior is the same in both cases).
+- For values with language codes, specify the language by adding `*` followed by the ISO-639 reference name or 2-char code, ex `*French` or `*FR`. Use reference names from the [Python library iso639](https://github.com/LBeaudoux/iso639/blob/master/iso639/data/ISO-639-2_utf-8.txt)
+- Values in `status` and `types` field can be specified using any casing, but will be converted to lowercase
+
+
+| Column name | Value format | Example | Notes |
+| ------------------------------------ | ---------------------------------- | ----------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| id | Single | [https://ror.org/01an7q238](https://ror.org/01an7q238) | ROR ID as full url; include for updated records only |
+| domains | Single or Multiple, separated by ; | foo.org
foo.org;bar.org | |
+| established | Single | 1973 | |
+| external_ids.type.fundref.all | Single or Multiple, separated by ; | 100000015
100000015;100006157 | |
+| external_ids.type.fundref.preferred | Single | 100000015 | Preferred value must exist in all |
+| external_ids.type.grid.all | Single or Multiple, separated by ; | grid.85084.31
grid.85084.31;grid.85084.58 | |
+| external_ids.type.grid.preferred | Single | grid.85084.31 | Preferred value must exist in all |
+| external_ids.type.isni.all | Single or Multiple, separated by ; | 0000 0001 2342 3717
0000 0001 2342 3717;0000 0001 2342 3525 | |
+| external_ids.type.isni.preferred | Single | 0000 0001 2342 3717 | Preferred value must exist in all |
+| external_ids.type.wikidata.all | Single or Multiple, separated by ; | Q217810
Q217810;Q6422983 | |
+| external_ids.type.wikidata.preferred | Single | Q217810 | Preferred value must exist in all |
+| links.type.website | Single or Multiple, separated by ; | https://foo.org
https://foo.org;https://foo.bar.org | |
+| links.type.wikipedia | Single or Multiple, separated by ; | http://en.wikipedia.org/wiki/foo
http://en.wikipedia.org/wiki/foo;http://en.wikipedia.org/wiki/bar | |
+| locations.geonames_id | Single or Multiple, separated by ; | 6252001
6252001;6252002 | |
+| names.types.acronym | Single or Multiple, separated by ; | US
US;UoS | |
+| names.types.alias | Single or Multiple, separated by ; | Stuff University
Stuff University;U Stuff | |
+| names.types.label | Single or Multiple, separated by ; | Universidad de Stuff\*Spanish
Universidad de Stuff\*Spanish;Université de Stuff\*French | Language can be specified for any name type using its full ISO 639-2 reference name or 2-char code, ex \*French or \*FR. Python iso639 is used for language code conversion, and it has some quirks. See mapping of language names to codes https://github.com/LBeaudoux/iso639/blob/master/iso639/data/ISO-639-2_utf-8.txt |
+| names.types.ror_display | Single | University of Stuff | |
+| status | Single | active | Any casing allowed; will be converted to lowercase |
+| types | Single or Multiple, separated by ; | government
government;education | Any casing allowed; will be converted to lowercase |
+
+#### Update syntax
+
+- For new records, specify just the desired field values in the CSV (no actions)
+- For updated records, use the syntax `add==`, `delete==`, `delete` or `replace==` to specify the action to be taken on specified values, ex `add==Value to be added` or `add==Value to be added;Another value to be added`
+- Add and delete actions can be combined, ex `add==Value to be added;Another value to be added;delete==Value to be deleted`. Add or delete cannot be combined with replace, because replace would overwrite anything specified by add/delete actions
+- Some actions are not allowed for certain fields (see below); invalid actions or invalid combinations of actions will result in the row being skipped. Errors are recorded report.csv.
+- When processing a given field, delete actions are processed first, followed by add actions, regardless of how they are ordered in the submitted CSV
+
+
+| Action | Behavior | Allowed fields | Notes |
+| ------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| add== | Add specified item(s) to multi-item field | domains, external_ids.type.fundref.all, external_ids.type.grid.all, external_ids.type.isni.all, external_ids.type.wikidata.all, links.type.website, links.type.wikipedia, locations, names.types.acronym, names.types.alias, names.types.label, types | Values to be added are validated to ensure they don't already exist in field, however, only exact matches are checked. Variants with different leading/trailing characters and/or diacritics are not matched.
add== has special behavior for external_ids.[type].all and names fields - see below. |
+| delete== | Remove specified item(s) from multi-item field | domains, external_ids.type.fundref.all, external_ids.type.grid.all, external_ids.type.isni.all, external_ids.type.wikidata.all, links.type.website, links.type.wikipedia, locations, names.types.acronym, names.types.alias, names.types.label, types | Values to be deleted are validated to ensure they exist in field, however, only exact matches are checked. Variants with different leading/trailing characters and/or diacritics are not matched.
delete== has special behavior for external_ids.[type].all and names fields - see below |
+| delete | Remove all values from field (single or multi-item field) | All optional fields. Not allowed for required fields: locations, names.types.ror_display, status, types | |
+| replace== | Replace all value(s) with specified value(s) (single or multi-item field) | All fields | replace== has special behavior for external_ids.[type].all and names fields - see below |
+| no action (only value supplied) | Replace existing value or add value to currently empty field (single-item fields) | established, external_ids preferred, status, names.types.ror_display | Same action as replace |
+#### Fields with special behaviors
+For some fields that contain a list of dictionaries as their value, update actions have special behaviors.
+
+##### External IDs
+
+| Action | external_ids.[TYPE].all | external.[TYPE].preferred |
+| ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| add== | If an external_ids object with the type exists, value(s) are added to external_ids.[TYPE].all. If an external_ids object with the type does not exist, a new object is added with value(s) in If an external_ids object with the type exists. A preferred ID is NOT automatically added - it must be explicitly specified in external.[TYPE].preferred . | Not allowed. Add== action is only allowed for multi-value fields |
+| delete== | Value(s) are removed from external_ids.[TYPE].all. After all changes to external_ids.[TYPE].all and external.[TYPE].preferred are calcuated, if the result is that BOTH fields are empty the entire external_ids object is deleted. Preferred ID is NOT automatically removed if the value is removed from external_ids.[TYPE].all - it must be explicitly deleted from external.[TYPE].preferred | Not allowed. Add== action is only allowed for multi-value fields |
+| replace== | Replaces any existing value(s) in external_ids.[TYPE].all or populates field if no value(s) exist. Preferred ID is NOT automatically removed if the value is removed from external_ids.[TYPE].all - it must be explicitly deleted from external.[TYPE].preferred | Replaces any existing value from external.[TYPE].preferred or populates field if no value exists. Value is NOT automatically added to external_ids.[TYPE].all - it must be explicitly added to external.[TYPE].all |
+| delete | Deletes any existing all existing values from external_ids.[TYPE].all. Preferred ID is NOT automatically removed from external_ids.[TYPE].all - it must be explicitly deleted from external.[TYPE].all . After all changes to external_ids.[TYPE].all and external.[TYPE].preferred are calcuated, if the result is that BOTH fields are empty the entire external_ids object is deleted. | Deletes any existing value in external.[TYPE].preferred. Value is NOT automatically removed from external_ids.[TYPE].all - it must be explicitly deleted from external.[TYPE].all |
+| no action (only value supplied) | Same as replace== | Same as replace== |
+
+##### Names
+
+| Action | names.[TYPE] |
+| ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| add== | If a names object with the exact same value AND language exists, the type is added to types field. If not, a new names object is added with the specifed value, language and type. If no language is specified, the lang field is null. NOTE: because matching is based on the combination of value AND lang, a case like "value": "University of Foo", "lang": null does not match "value": "University of Foo", "lang": "en" |
+| delete== | If the name to be removed has multiple types in its types field, the specified type is removed from the types field, but the names object remains. If the result of all changes is a names object with no types, the entire names object is removed. |
+| replace== | Names of the specified type are removed according to the delete== rules above, then added according to the add== rules above. Depending on the existing values on the record and the values specifed in replace==, that can result in some names objects added, some removed and/or some with changes to their types field. |
+| delete | Removes the specified type from all names objects that currently have that type in their types field. If the result of all changes is a names object with no types, the entire names object is removed. |
+| no action (only value supplied) | Same as replace== |
+
+
+
+
diff --git a/requirements.txt b/requirements.txt
index ab14247..84b7ffe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,5 +20,8 @@ boto3
pandas==1.4.1
numpy==1.22
titlecase==2.3
-update_address @ git+https://github.com/ror-community/update_address.git
-launchdarkly-server-sdk
\ No newline at end of file
+update_address @ git+https://github.com/ror-community/update_address.git@v2-locations
+launchdarkly-server-sdk==7.6.1
+jsonschema==3.2.0
+python-magic
+iso639-lang
\ No newline at end of file
diff --git a/rorapi/common/create_update.py b/rorapi/common/create_update.py
new file mode 100644
index 0000000..cafdb18
--- /dev/null
+++ b/rorapi/common/create_update.py
@@ -0,0 +1,96 @@
+import copy
+from datetime import datetime
+from rorapi.common.record_utils import *
+import update_address as ua
+from rorapi.v2.record_constants import *
+from rorapi.v2.serializers import (
+ OrganizationSerializer as OrganizationSerializerV2
+)
+from rorapi.management.commands.generaterorid import check_ror_id
+
+V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/schema-v2/ror_schema_v2_0.json")
+
+
+def update_record(json_input, existing_record):
+ record = copy.deepcopy(existing_record)
+ for k, v in json_input.items():
+ record[k] = copy.deepcopy(v)
+ return update_last_mod(record)
+
+def update_last_mod(record):
+ record['admin']['last_modified'] = copy.deepcopy(V2_LAST_MOD)
+ record['admin']['last_modified']['date'] = datetime.now().strftime("%Y-%m-%d")
+ return record
+
+def check_optional_fields(record):
+ for k in V2_OPTIONAL_FIELD_DEFAULTS:
+ if k not in record:
+ return True
+ return False
+
+def add_missing_optional_fields(record):
+ for k, v in V2_OPTIONAL_FIELD_DEFAULTS.items():
+ if k not in record:
+ record[k] = v
+ return record
+
+def add_created_last_mod(record):
+ today = datetime.now().strftime("%Y-%m-%d")
+ record['admin'] = copy.deepcopy(V2_ADMIN)
+ record['admin']['created']['date'] = today
+ record['admin']['last_modified']['date'] = today
+ return record
+
+def update_locations(locations):
+ error = None
+ updated_locations = []
+ for location in locations:
+ if 'geonames_id' in location:
+ try:
+ print(location['geonames_id'])
+ updated_location = ua.new_geonames_v2(str(location['geonames_id']))
+ updated_locations.append(updated_location['location'])
+ except:
+ error = "Error retrieving Geonames data for ID {}. Please check that this is a valid Geonames ID".format(location['geonames_id'])
+ return error, updated_locations
+
+def sort_list_fields(v2_record):
+ for field in v2_record:
+ if field in V2_SORT_KEYS:
+ if V2_SORT_KEYS[field] is not None:
+ sort_key = V2_SORT_KEYS[field]
+ sorted_vals = sorted(v2_record[field], key=lambda x: x[sort_key])
+ else:
+ sorted_vals = sorted(v2_record[field])
+ v2_record[field] = sorted_vals
+ return v2_record
+
+
+def new_record_from_json(json_input, version):
+ error = None
+ valid_data = None
+ new_record = copy.deepcopy(json_input)
+ if check_optional_fields(new_record):
+ new_record = add_missing_optional_fields(new_record)
+ error, updated_locations = update_locations(new_record['locations'])
+ if not error:
+ new_record['locations'] = updated_locations
+ new_record = add_created_last_mod(new_record)
+ new_ror_id = check_ror_id(version)
+ print("new ror id: " + new_ror_id)
+ new_record['id'] = new_ror_id
+ error, valid_data = validate_record(sort_list_fields(new_record), V2_SCHEMA)
+ return error, valid_data
+
+
+def update_record_from_json(new_json, existing_org):
+ error = None
+ valid_data = None
+ serializer = OrganizationSerializerV2(existing_org)
+ existing_record = serializer.data
+ updated_record = update_record(new_json, existing_record)
+ error, updated_locations = update_locations(updated_record['locations'])
+ if not error:
+ updated_record['locations'] = updated_locations
+ error, valid_data = validate_record(sort_list_fields(updated_record), V2_SCHEMA)
+ return error, valid_data
diff --git a/rorapi/common/csv_bulk.py b/rorapi/common/csv_bulk.py
new file mode 100644
index 0000000..ff42d70
--- /dev/null
+++ b/rorapi/common/csv_bulk.py
@@ -0,0 +1,118 @@
+import csv
+import json
+import io
+import os
+import shutil
+import urllib
+from datetime import datetime
+from rest_framework.renderers import JSONRenderer
+from rorapi.settings import DATA
+from rorapi.v2.serializers import (
+ OrganizationSerializer as OrganizationSerializerV2
+)
+from rorapi.common.csv_update import update_record_from_csv
+from rorapi.common.csv_create import new_record_from_csv
+
+
+def save_record_file(ror_id, updated, json_obj, dir_name):
+ dir_path = os.path.join(DATA['DIR'],dir_name)
+ if not os.path.exists(dir_path):
+ os.makedirs(dir_path)
+ subdir = 'updates' if updated else 'new'
+ if not os.path.exists(os.path.join(dir_path, subdir)):
+ os.mkdir(os.path.join(dir_path, subdir))
+ full_path = os.path.join(dir_path, subdir, ror_id.split('https://ror.org/')[1] + '.json')
+ with open(full_path, "w") as outfile:
+ json.dump(json_obj, outfile, ensure_ascii=False, indent=2)
+
+def save_report_file(report, report_fields, csv_file, dir_name, validate_only):
+ dir_path = os.path.join(DATA['DIR'],dir_name)
+ if not os.path.exists(dir_path):
+ os.makedirs(dir_path)
+ filepath = os.path.join(dir_path, 'report.csv')
+ with open(filepath, 'w') as csvfile:
+ writer = csv.DictWriter(csvfile, fieldnames=report_fields)
+ writer.writeheader()
+ writer.writerows(report)
+ if not validate_only:
+ # save copy of input file
+ filepath = os.path.join(dir_path, 'input.csv')
+ csv_file.seek(0)
+ with open(filepath, 'wb+') as f:
+ for chunk in csv_file.chunks():
+ f.write(chunk)
+
+def process_csv(csv_file, version, validate_only):
+ print("Processing CSV")
+ dir_name = datetime.now().strftime("%Y-%m-%d_%H_%M_%S") + "-ror-records"
+ success_msg = None
+ error = None
+ report = []
+ report_fields = ['row', 'ror_id', 'action', 'errors']
+ skipped_count = 0
+ updated_count = 0
+ new_count = 0
+ read_file = csv_file.read().decode('utf-8')
+ print(read_file)
+ reader = csv.DictReader(io.StringIO(read_file))
+ row_num = 2
+ for row in reader:
+ ror_id = None
+ updated = False
+ print("Row data")
+ print(row)
+ if row['id']:
+ ror_id = row['id']
+ updated = True
+ row_errors, v2_record = update_record_from_csv(row, version)
+ else:
+ row_errors, v2_record = new_record_from_csv(row, version)
+ if not row_errors:
+ if updated:
+ action = 'updated'
+ updated_count += 1
+ else:
+ action = 'created'
+ new_count += 1
+ ror_id = v2_record['id']
+ serializer = OrganizationSerializerV2(v2_record)
+ json_obj = json.loads(JSONRenderer().render(serializer.data))
+ print(json_obj)
+ if not validate_only:
+ #create file
+ file = save_record_file(ror_id, updated, json_obj, dir_name)
+ else:
+ action = 'skipped'
+ skipped_count += 1
+ if validate_only and action == 'created':
+ ror_id = None
+ report.append({"row": row_num, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''})
+ row_num += 1
+ if new_count > 0 or updated_count > 0 or skipped_count > 0:
+ try:
+ if validate_only:
+ try:
+ save_report_file(report, report_fields, csv_file, dir_name, validate_only)
+ success_msg = os.path.join(DATA['DIR'], dir_name, 'report.csv')
+ except Exception as e:
+ error = f"Error creating validation report: {e}"
+ else:
+ #create report file
+ save_report_file(report, report_fields, csv_file, dir_name, validate_only)
+ # create zip file
+ zipfile = shutil.make_archive(os.path.join(DATA['DIR'], dir_name), 'zip', DATA['DIR'], dir_name)
+ # upload to S3
+ try:
+ DATA['CLIENT'].upload_file(zipfile, DATA['PUBLIC_STORE'], dir_name + '.zip')
+ zipfile = f"https://s3.eu-west-1.amazonaws.com/{DATA['PUBLIC_STORE']}/{urllib.parse.quote(dir_name)}.zip"
+ success_msg = {"file": zipfile,
+ "rows processed": new_count + updated_count + skipped_count,
+ "created": new_count,
+ "updated": updated_count,
+ "skipped": skipped_count}
+ except Exception as e:
+ error = f"Error uploading zipfile to S3: {e}"
+ except Exception as e:
+ error = f"Unexpected error generating records: {e}"
+
+ return error, success_msg
\ No newline at end of file
diff --git a/rorapi/common/csv_create.py b/rorapi/common/csv_create.py
new file mode 100644
index 0000000..cf1260d
--- /dev/null
+++ b/rorapi/common/csv_create.py
@@ -0,0 +1,110 @@
+import copy
+from rorapi.common.record_utils import *
+from rorapi.common.csv_utils import *
+from rorapi.v2.record_constants import *
+from rorapi.common.serializers import ErrorsSerializer
+from rorapi.common.create_update import new_record_from_json
+
+
+def new_record_from_csv(csv_data, version):
+ v2_data = copy.deepcopy(V2_TEMPLATE)
+ errors = []
+ #domains
+ if csv_data['domains']:
+ v2_data['domains'] = [d.strip() for d in csv_data['domains'].strip(';').split(';')]
+
+ #established
+ if csv_data['established']:
+ v2_data['established'] = int(csv_data['established'].strip())
+
+ #external ids
+ for k,v in V2_EXTERNAL_ID_TYPES.items():
+ if csv_data['external_ids.type.' + v + '.all']:
+ all_ids = [i.strip() for i in csv_data['external_ids.type.' + v + '.all'].strip(';').split(';')]
+ ext_id_obj = {
+ "type": v,
+ "all": all_ids,
+ "preferred": csv_data['external_ids.type.' + v + '.preferred'].strip() if csv_data['external_ids.type.' + v + '.preferred'] else all_ids[0]
+ }
+ v2_data['external_ids'].append(ext_id_obj)
+
+ #links
+ for k,v in V2_LINK_TYPES.items():
+ if csv_data['links.type.' + v]:
+ for l in csv_data['links.type.' + v].strip(';').split(';'):
+ link_obj = {
+ "type": v,
+ "value": l.strip()
+ }
+ v2_data['links'].append(link_obj)
+
+ #locations
+ if csv_data['locations.geonames_id']:
+ geonames_ids = [i.strip() for i in csv_data['locations.geonames_id'].strip(';').split(';')]
+ for geonames_id in geonames_ids:
+ location_obj = {
+ "geonames_id": geonames_id,
+ "geonames_details": {}
+ }
+ v2_data['locations'].append(location_obj)
+
+ #names
+ temp_names = []
+ for k,v in V2_NAME_TYPES.items():
+ if csv_data['names.types.' + v]:
+ for n in csv_data['names.types.' + v].strip(';').split(';'):
+ if LANG_DELIMITER in n:
+ name_val, lang = n.split("*")
+ if lang:
+ lang_errors, lang_code = get_lang_code(lang.strip())
+ if lang_errors:
+ errors.append("Could not convert language value to ISO code: {}".format(lang))
+ else:
+ name_val = n
+ lang_code = None
+
+ name_obj = {
+ "types": [v],
+ "value": name_val.strip(),
+ "lang": lang_code
+ }
+ temp_names.append(name_obj)
+ print("temp names 1:")
+ print(temp_names)
+ name_values = [n['value'] for n in temp_names]
+ dup_names = []
+ for n in name_values:
+ if name_values.count(n) > 1:
+ if n not in dup_names:
+ dup_names.append(n)
+ if dup_names:
+ dup_names_objs = []
+ for d in dup_names:
+ types = []
+ for t in temp_names:
+ if t['value'] == d:
+ types.extend(t['types'])
+ name_obj = {
+ "types": types,
+ "value": d,
+ "lang": None
+ }
+ dup_names_objs.append(name_obj)
+ temp_names = [t for t in temp_names if t['value'] not in dup_names]
+ temp_names.extend(dup_names_objs)
+ print("temp names 2:")
+ print(temp_names)
+ v2_data['names'] = temp_names
+
+ #status
+ if csv_data['status']:
+ v2_data['status'] = csv_data['status'].strip().lower()
+
+ #types
+ if csv_data['types']:
+ v2_data['types'] = [t.strip().lower() for t in csv_data['types'].strip(';').split(';')]
+
+ validation_error, new_record = new_record_from_json(v2_data, version)
+ if validation_error:
+ errors.append(validation_error)
+ return errors, new_record
\ No newline at end of file
diff --git a/rorapi/common/csv_update.py b/rorapi/common/csv_update.py
new file mode 100644
index 0000000..d8b44a6
--- /dev/null
+++ b/rorapi/common/csv_update.py
@@ -0,0 +1,388 @@
+import copy
+from rorapi.common.record_utils import *
+from rorapi.v2.record_constants import *
+from rorapi.common.csv_utils import *
+from rorapi.v2.serializers import (
+ OrganizationSerializer as OrganizationSerializerV2
+)
+from rorapi.common.queries import retrieve_organization
+from rorapi.common.serializers import ErrorsSerializer
+from rorapi.common.create_update import update_record_from_json
+
+def update_record_from_csv(csv_data, version):
+ errors = []
+ updated_record = None
+ print("updating record from csv")
+ existing_org_errors, existing_org = retrieve_organization(csv_data['id'], version)
+ print(existing_org)
+ if existing_org is None:
+ errors.append("No existing record found for ROR ID '{}'".format(csv_data['id']))
+ else:
+ row_validation_errors = validate_csv_row_update_syntax(csv_data)
+ if row_validation_errors:
+ errors.extend(row_validation_errors)
+ print("row validation errors:")
+ print(errors)
+ else:
+ serializer = OrganizationSerializerV2(existing_org)
+ existing_record = serializer.data
+ print(existing_record)
+ update_data = {}
+
+ #domains
+ if csv_data['domains']:
+ actions_values = get_actions_values(csv_data['domains'])
+ temp_domains = copy.deepcopy(existing_record['domains'])
+ print("initial temp domains:")
+ print(temp_domains)
+ if UPDATE_ACTIONS['DELETE'] in actions_values:
+ delete_values = actions_values[UPDATE_ACTIONS['DELETE']]
+ if delete_values is None:
+ temp_domains = []
+ else:
+ for d in delete_values:
+ if d not in temp_domains:
+ errors.append("Attempting to delete domain(s) that don't exist: {}".format(d))
+
+ temp_domains = [d for d in temp_domains if d not in delete_values]
+ print("temp domains delete")
+ print(temp_domains)
+ if UPDATE_ACTIONS['ADD'] in actions_values:
+ add_values = actions_values[UPDATE_ACTIONS['ADD']]
+ for a in add_values:
+ if a in temp_domains:
+ errors.append("Attempting to add domain(s) that already exist: {}".format(a))
+ print(add_values)
+ temp_domains.extend(add_values)
+ print("temp domains add")
+ print(temp_domains)
+ if UPDATE_ACTIONS['REPLACE'] in actions_values:
+ temp_domains = actions_values[UPDATE_ACTIONS['REPLACE']]
+ print("temp domains replace")
+ print(temp_domains)
+ print("final temp domains:")
+ print(temp_domains)
+ update_data['domains'] = temp_domains
+
+ #established
+ if csv_data['established']:
+ actions_values = get_actions_values(csv_data['established'])
+ if UPDATE_ACTIONS['DELETE'] in actions_values:
+ update_data['established'] = None
+ if UPDATE_ACTIONS['REPLACE'] in actions_values:
+ update_data['established'] = int(actions_values[UPDATE_ACTIONS['REPLACE']][0])
+
+ #external ids
+ updated_ext_id_types = []
+ for k,v in V2_EXTERNAL_ID_TYPES.items():
+ if csv_data['external_ids.type.' + v + '.all'] or csv_data['external_ids.type.' + v + '.preferred']:
+ updated_ext_id_types.append(v)
+ if updated_ext_id_types:
+ temp_ext_ids = copy.deepcopy(existing_record['external_ids'])
+ for t in updated_ext_id_types:
+ temp_all = []
+ temp_preferred = None
+ existing_ext_id_obj = None
+ existing_ext_ids_type = [i for i in temp_ext_ids if i['type'] == t]
+ if len(existing_ext_ids_type) == 1:
+ existing_ext_id_obj = existing_ext_ids_type[0]
+ temp_all = existing_ext_id_obj['all']
+ temp_preferred = existing_ext_id_obj['preferred']
+ if len(existing_ext_ids_type) > 1:
+ errors.append("Something is wrong. Multiple external ID objects with type ".format(t))
+
+ # external_ids.all
+ if csv_data['external_ids.type.' + t + '.all']:
+ actions_values = get_actions_values(csv_data['external_ids.type.' + t + '.all'])
+ if UPDATE_ACTIONS['DELETE'] in actions_values:
+ delete_values = actions_values[UPDATE_ACTIONS['DELETE']]
+ if delete_values is None:
+ temp_all = []
+ else:
+ for d in delete_values:
+ if d not in temp_all:
+ errors.append("Attempting to delete external ID(s) from {}.all that don't exist: {}".format(t, d))
+ temp_all = [i for i in temp_all if i not in delete_values]
+ if UPDATE_ACTIONS['ADD'] in actions_values:
+ add_values = [a for a in actions_values[UPDATE_ACTIONS['ADD']]]
+ for a in add_values:
+ if a in temp_all:
+ errors.append("Attempting to add external ID(s) to {}.all that already exist: {}".format(t, a))
+ temp_all.extend(add_values)
+ if UPDATE_ACTIONS['REPLACE'] in actions_values:
+ temp_all = actions_values[UPDATE_ACTIONS['REPLACE']]
+
+ # external_ids.preferred
+ if csv_data['external_ids.type.' + t + '.preferred']:
+ actions_values = get_actions_values(csv_data['external_ids.type.' + t + '.preferred'])
+ if UPDATE_ACTIONS['DELETE'] in actions_values:
+ temp_preferred = None
+ if UPDATE_ACTIONS['REPLACE'] in actions_values:
+ temp_preferred = actions_values[UPDATE_ACTIONS['REPLACE']][0]
+
+ if (not temp_all) and temp_preferred is None:
+ # remove all of type
+ if not existing_ext_id_obj:
+ errors.append("Attempting to delete external ID object with type {} that doesn't exist.".format(t))
+ temp_ext_ids = [i for i in temp_ext_ids if i['type'] != t]
+
+ else:
+ if not temp_preferred in temp_all:
+ errors.append("Changes to external ID object with type {} result in preferred value '{}' not in all values '{}'".format(t, temp_preferred, ", ".join(temp_all)))
+ # remove all of type and replace with new obj
+ new_ext_id_obj = {
+ "type": t,
+ "all": temp_all,
+ "preferred": temp_preferred
+ }
+ if existing_ext_id_obj:
+ temp_ext_ids = [i for i in temp_ext_ids if i['type'] != t]
+ temp_ext_ids.append(new_ext_id_obj)
+
+ update_data['external_ids'] = temp_ext_ids
+
+ #links
+ updated_link_types = []
+ for k,v in V2_LINK_TYPES.items():
+ if csv_data['links.type.' + v]:
+ updated_link_types.append(v)
+ if updated_link_types:
+ temp_links = copy.deepcopy(existing_record['links'])
+ for t in updated_link_types:
+ if csv_data['links.type.' + t]:
+ actions_values = get_actions_values(csv_data['links.type.' + t])
+ existing_links = [tl['value'] for tl in temp_links]
+ if UPDATE_ACTIONS['DELETE'] in actions_values:
+ delete_values = actions_values[UPDATE_ACTIONS['DELETE']]
+ if delete_values is None:
+ temp_links = [tl for tl in temp_links if tl['type'] != t]
+ else:
+ for d in delete_values:
+ if d not in existing_links:
+ errors.append("Attempting to delete link(s) that don't exist: {}".format(d))
+ temp_links = [tl for tl in temp_links if tl['value'] not in delete_values]
+ if UPDATE_ACTIONS['ADD'] in actions_values:
+ add_values = [a for a in actions_values[UPDATE_ACTIONS['ADD']]]
+ for a in add_values:
+ if a in existing_links:
+ errors.append("Attempting to add link(s) that already exist: {}".format(a))
+ for a in add_values:
+ link_obj = {
+ "type": t,
+ "value": a
+ }
+ temp_links.append(link_obj)
+ if UPDATE_ACTIONS['REPLACE'] in actions_values:
+ temp_links = [l for l in temp_links if l['type'] != t]
+ for r in actions_values[UPDATE_ACTIONS['REPLACE']]:
+ link_obj = {
+ "type": t,
+ "value": r
+ }
+ temp_links.append(link_obj)
+ print("final temp links:")
+ print(temp_links)
+ update_data['links'] = temp_links
+
+ #locations
+ if csv_data['locations.geonames_id']:
+ actions_values = get_actions_values(csv_data['locations.geonames_id'])
+ temp_locations = copy.deepcopy(existing_record['locations'])
+ print("initial temp locations:")
+ print(temp_locations)
+ existing_geonames_ids = [tl['geonames_id'] for tl in temp_locations]
+ print(existing_geonames_ids)
+ if UPDATE_ACTIONS['DELETE'] in actions_values:
+ delete_values = [int(d) for d in actions_values[UPDATE_ACTIONS['DELETE']]]
+ for d in delete_values:
+ if d not in existing_geonames_ids:
+ errors.append("Attempting to delete locations(s) that don't exist: {}".format(d))
+ if len(existing_geonames_ids) == len(delete_values):
+ errors.append("Cannot remove all values from required field 'locations'")
+ temp_locations = [tl for tl in temp_locations if tl['geonames_id'] not in delete_values]
+ if UPDATE_ACTIONS['ADD'] in actions_values:
+ add_values = [int(a) for a in actions_values[UPDATE_ACTIONS['ADD']]]
+ for a in add_values:
+ if int(a) in existing_geonames_ids:
+ errors.append("Attempting to add locations(s) that already exist: {}".format(a))
+ for a in add_values:
+ location_obj = {
+ "geonames_id": int(a),
+ "geonames_details": {}
+ }
+ temp_locations.append(location_obj)
+ if UPDATE_ACTIONS['REPLACE'] in actions_values:
+ temp_locations = []
+ for r in actions_values[UPDATE_ACTIONS['REPLACE']]:
+ location_obj = {
+ "geonames_id": int(r),
+ "geonames_details": {}
+ }
+ temp_locations.append(location_obj)
+ print("final temp locations:")
+ print(temp_locations)
+ update_data['locations'] = temp_locations
+
+ #names
+ updated_name_types = []
+ for k,v in V2_NAME_TYPES.items():
+ if csv_data['names.types.' + v]:
+ updated_name_types.append(v)
+ print("updated name types")
+ print(updated_name_types)
+ if updated_name_types:
+ temp_names = copy.deepcopy(existing_record['names'])
+ for t in updated_name_types:
+ print("updating name type " + t)
+ if csv_data['names.types.' + t]:
+ actions_values = get_actions_values(csv_data['names.types.' + t])
+ for k, v in actions_values.items():
+ if v:
+ vals_obj_list = []
+ for val in v:
+ print("val is")
+ print(val)
+ vals_obj = {
+ "value": None,
+ "lang": None
+ }
+ if LANG_DELIMITER in val:
+ print("has lang delim")
+ name_val, lang = val.split("*")
+ vals_obj["value"] = name_val.strip()
+ if lang:
+ lang_errors, lang_code = get_lang_code(lang.strip())
+ if lang_errors:
+ errors.append("Could not convert language value to ISO code: {}".format(lang))
+ else:
+ vals_obj["lang"] = lang_code
+ else:
+ vals_obj["value"] = val.strip()
+ vals_obj_list.append(vals_obj)
+ actions_values[k] = vals_obj_list
+ print("updated actions values")
+ print(actions_values)
+ if UPDATE_ACTIONS['DELETE'] in actions_values:
+ print("delete in actions")
+ delete_values = actions_values[UPDATE_ACTIONS['DELETE']]
+ print(delete_values)
+ if delete_values is None:
+ temp_names = [tn for tn in temp_names if t not in tn['types']]
+ else:
+ for d in delete_values:
+ temp_names_match = [tn for tn in temp_names if (t in tn['types'] and tn['value'] == d['value'] and tn['lang'] == d['lang'])]
+ if not temp_names_match:
+ errors.append("Attempting to delete name(s) that don't exist: {}".format(d))
+ else:
+ for tnm in temp_names_match:
+ temp_names.remove(tnm)
+ #if name has multiple types, delete type only
+ if len(tnm['types']) > 1:
+ temp_types = [tnm_type for tnm_type in tnm['types'] if tnm_type != t]
+ tnm['types'] = temp_types
+ temp_names.append(tnm)
+
+ if UPDATE_ACTIONS['ADD'] in actions_values:
+ add_values = actions_values[UPDATE_ACTIONS['ADD']]
+ for a in add_values:
+ temp_names_match = [tn for tn in temp_names if (t in tn['types'] and tn['value'] == a['value'] and tn['lang'] == a['lang'])]
+ temp_names_null_lang_match = [tn for tn in temp_names if (tn['value'] == a['value'] and (tn['lang'] is None and a['lang'] is not None))]
+ # check if value, lang and type already exist
+ if temp_names_match or temp_names_null_lang_match:
+ if temp_names_match:
+ errors.append("Attempting to add names that already exists: {}".format(a))
+ if temp_names_null_lang_match:
+ errors.append("Attempting to add name with lang code that already exists with no lang code: {}".format(a))
+ else:
+ name_vals_match = [tn for tn in temp_names if (tn['value'] == a['value'] and tn['lang'] == a['lang'])]
+ if name_vals_match:
+ print("name vals match")
+ print(name_vals_match)
+ for nvm in name_vals_match:
+ # if value and lang exist but not type, add type only
+ if len(nvm['types']) > 0:
+ temp_names.remove(nvm)
+ nvm['types'].append(t)
+ temp_names.append(nvm)
+ else:
+ # if value and lang don't exist add new name obj
+ name_obj = {
+ "types": [t],
+ "value": a['value'],
+ "lang": a['lang']
+ }
+ temp_names.append(name_obj)
+ if UPDATE_ACTIONS['REPLACE'] in actions_values:
+ temp_names_match = [tn for tn in temp_names if t in tn['types']]
+ # remove all names of current type from temp names using same rules as delete
+ if temp_names_match:
+ for tnm in temp_names_match:
+ temp_names.remove(tnm)
+ #if name has multiple types, delete type only
+ if len(tnm['types']) > 1:
+ temp_types = [tnm_type for tnm_type in tnm['types'] if tnm_type != t]
+ tnm['types'] = temp_types
+ temp_names.append(tnm)
+ replace_values = actions_values[UPDATE_ACTIONS['REPLACE']]
+ for r in replace_values:
+ name_vals_match = [tn for tn in temp_names if (tn['value'] == r['value'] and tn['lang'] == r['lang'])]
+ # add new names of current type to temp names using same rules as add
+ if name_vals_match:
+ for nvm in name_vals_match:
+ # if value and lang exist but not type, add type only
+ if len(nvm['types']) > 0:
+ temp_names.remove(nvm)
+ nvm['types'].append(t)
+ temp_names.append(nvm)
+ else:
+ # if value and lang don't exist add new name obj
+ name_obj = {
+ "types": [t],
+ "value": r['value'],
+ "lang": r['lang']
+ }
+ temp_names.append(name_obj)
+
+ print("final temp names:")
+ print(temp_names)
+ update_data['names'] = temp_names
+
+ #status
+ if csv_data['status']:
+ actions_values = get_actions_values(csv_data['status'])
+ if UPDATE_ACTIONS['DELETE'] in actions_values:
+ errors.append("Cannot delete required field 'status'")
+ if UPDATE_ACTIONS['REPLACE'] in actions_values:
+ update_data['status'] = actions_values[UPDATE_ACTIONS['REPLACE']][0].lower()
+
+ #types
+ if csv_data['types']:
+ actions_values = get_actions_values(csv_data['types'])
+ temp_types = copy.deepcopy(existing_record['types'])
+ print("initial temp types:")
+ print(temp_types)
+ if UPDATE_ACTIONS['DELETE'] in actions_values:
+ delete_values = [av.lower() for av in actions_values[UPDATE_ACTIONS['DELETE']]]
+ for d in delete_values:
+ if d not in temp_types:
+ errors.append("Attempting to delete type(s) that don't exist: {}".format(d))
+ if len(temp_types) == len(delete_values):
+ errors.append("Cannot remove all values from required field 'types'")
+ temp_types = [t for t in temp_types if t not in delete_values]
+ if UPDATE_ACTIONS['ADD'] in actions_values:
+ add_values = [av.lower() for av in actions_values[UPDATE_ACTIONS['ADD']]]
+ for a in add_values:
+ if a in temp_types:
+ errors.append("Attempting to add type(s) that already exist: {}".format(a))
+ temp_types.extend(add_values)
+ if UPDATE_ACTIONS['REPLACE'] in actions_values:
+ temp_types = [av.lower() for av in actions_values[UPDATE_ACTIONS['REPLACE']]]
+ print("final temp types:")
+ print(temp_types)
+ update_data['types'] = temp_types
+
+ if not errors:
+ validation_error, updated_record = update_record_from_json(update_data, existing_record)
+ if validation_error:
+ errors.append(validation_error)
+ return errors, updated_record
\ No newline at end of file
diff --git a/rorapi/common/csv_utils.py b/rorapi/common/csv_utils.py
new file mode 100644
index 0000000..8c4d706
--- /dev/null
+++ b/rorapi/common/csv_utils.py
@@ -0,0 +1,122 @@
+import csv
+import io
+import re
+
+UPDATE_ACTIONS = {
+ "ADD": "add",
+ "DELETE": "delete",
+ "REPLACE": "replace"
+}
+
+UPDATE_ACTIONS_MULTI = [UPDATE_ACTIONS["ADD"], UPDATE_ACTIONS["DELETE"], UPDATE_ACTIONS["REPLACE"]]
+
+UPDATE_ACTIONS_SINGLE = [UPDATE_ACTIONS["DELETE"], UPDATE_ACTIONS["REPLACE"]]
+
+NO_DELETE_FIELDS = ["id", "locations.geonames_id", "names.types.ror_display", "status", "types"]
+
+CSV_REQUIRED_FIELDS_ACTIONS = {
+ "id": None,
+ "domains": UPDATE_ACTIONS_MULTI,
+ "established": UPDATE_ACTIONS_SINGLE,
+ "external_ids.type.fundref.all": UPDATE_ACTIONS_MULTI,
+ "external_ids.type.fundref.preferred": UPDATE_ACTIONS_SINGLE,
+ "external_ids.type.grid.all": UPDATE_ACTIONS_MULTI,
+ "external_ids.type.grid.preferred": UPDATE_ACTIONS_SINGLE,
+ "external_ids.type.isni.all": UPDATE_ACTIONS_MULTI,
+ "external_ids.type.isni.preferred": UPDATE_ACTIONS_SINGLE,
+ "external_ids.type.wikidata.all": UPDATE_ACTIONS_MULTI,
+ "external_ids.type.wikidata.preferred": UPDATE_ACTIONS_SINGLE,
+ "links.type.website": UPDATE_ACTIONS_MULTI,
+ "links.type.wikipedia": UPDATE_ACTIONS_MULTI,
+ "locations.geonames_id": UPDATE_ACTIONS_MULTI,
+ "names.types.acronym": UPDATE_ACTIONS_MULTI,
+ "names.types.alias": UPDATE_ACTIONS_MULTI,
+ "names.types.label": UPDATE_ACTIONS_MULTI,
+ "names.types.ror_display": [UPDATE_ACTIONS["REPLACE"]],
+ "status": [UPDATE_ACTIONS["REPLACE"]],
+ "types": UPDATE_ACTIONS_MULTI
+}
+
+LANG_DELIMITER = "*"
+
+UPDATE_DELIMITER = "=="
+
+
+def get_actions_values(csv_field):
+ print("getting actions values:")
+ actions_values = {}
+ if csv_field.lower() == UPDATE_ACTIONS["DELETE"]:
+ actions_values[UPDATE_ACTIONS["DELETE"]] = None
+ elif UPDATE_DELIMITER in csv_field:
+ for ua in list(UPDATE_ACTIONS.values()):
+ print(ua)
+ if ua + UPDATE_DELIMITER in csv_field:
+ print("doing regex:")
+ regex = r"(" + re.escape(
+ ua + UPDATE_DELIMITER) + r")(.*?)(?=$|(add|delete|replace)==)"
+ result = re.search(regex, csv_field)
+ print(result[0])
+ temp_val = result[0].replace(ua + UPDATE_DELIMITER, '')
+ print("temp val:")
+ print(temp_val)
+ actions_values[ua] = [v.strip() for v in temp_val.split(';') if v]
+
+ else:
+ actions_values[UPDATE_ACTIONS["REPLACE"]] = [v.strip() for v in csv_field.split(';') if v]
+ print(actions_values)
+ return actions_values
+
+def validate_csv(csv_file):
+ errors = []
+ try:
+ read_file = csv_file.read().decode('utf-8')
+ reader = csv.DictReader(io.StringIO(read_file))
+ rowcount = 0
+ for row in reader:
+ rowcount += 1
+ if rowcount > 0:
+ csv_fields = reader.fieldnames
+ missing_fields = []
+ for field in CSV_REQUIRED_FIELDS_ACTIONS.keys():
+ if field not in csv_fields:
+ missing_fields.append(field)
+ print(missing_fields)
+ if missing_fields:
+ errors.append(f'CSV file is missing columns: {", ".join(missing_fields)}')
+ else:
+ errors.append("CSV file contains no data rows")
+ except IOError as e:
+ errors.append(f"Error parsing CSV file: {e}")
+ print(errors)
+ return errors
+
+def validate_csv_row_update_syntax(csv_data):
+ print("validating row")
+ errors = []
+ for k, v in csv_data.items():
+ if UPDATE_DELIMITER in v:
+ print("field:")
+ print(k)
+ print("value:")
+ print(v)
+ actions_values = get_actions_values(v)
+ print("actions values:")
+ print(actions_values)
+ update_actions = list(actions_values.keys())
+ if not update_actions:
+ errors.append("Update delimiter '{}' found in '{}' field but no valid update action found in value {}".format(UPDATE_DELIMITER, k, v))
+ if len(update_actions) > 2:
+ errors.append("{} update actions '{}' found in '{}' field but only 2 are allowed".format(str(len(update_actions)), ", ".join(update_actions), k))
+ if len(update_actions) == 2:
+ if not (UPDATE_ACTIONS['ADD'] and UPDATE_ACTIONS['DELETE']) in update_actions:
+ errors.append("Invalid combination of update actions '{}' found in '{}' field.".format(", ".join(update_actions), k))
+ disallowed_actions = [ua for ua in update_actions if ua not in CSV_REQUIRED_FIELDS_ACTIONS[k]]
+ print("allowed actions:")
+ print(CSV_REQUIRED_FIELDS_ACTIONS[k])
+ print("disallowed actions:")
+ print(disallowed_actions)
+ if disallowed_actions:
+ errors.append("Invalid update action(s) '{}' found in {} field. Allowed actions for this field are '{}'".format(", ".join(disallowed_actions), k, ", ".join(CSV_REQUIRED_FIELDS_ACTIONS[k])))
+ if v.strip() == UPDATE_ACTIONS['DELETE'].lower() and k in NO_DELETE_FIELDS:
+ errors.append("Invalid update action '{}' in {} field. Cannot remove all values from a required field.".format(UPDATE_ACTIONS['DELETE'], k))
+ return errors
\ No newline at end of file
diff --git a/rorapi/common/features.py b/rorapi/common/features.py
index f3a9644..680402f 100644
--- a/rorapi/common/features.py
+++ b/rorapi/common/features.py
@@ -3,4 +3,4 @@
from rorapi.settings import LAUNCH_DARKLY_KEY
ldclient.set_config(Config(LAUNCH_DARKLY_KEY))
-launch_darkly_client = ldclient.get()
+launch_darkly_client = ldclient.get()
\ No newline at end of file
diff --git a/rorapi/common/parsers.py b/rorapi/common/parsers.py
new file mode 100644
index 0000000..687cc4a
--- /dev/null
+++ b/rorapi/common/parsers.py
@@ -0,0 +1,23 @@
+import jsonschema
+import requests
+from rest_framework.exceptions import ParseError
+from rest_framework.parsers import JSONParser
+
+
+class JSONSchemaParser(JSONParser):
+
+ def get_file_from_url(self, url):
+ rsp = requests.get(url)
+ rsp.raise_for_status()
+ return rsp.json()
+
+ def parse(self, stream, media_type=None, parser_context=None):
+ schema = self.get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/schema-v2/ror_schema_v2_0.json")
+ data = super(JSONSchemaParser, self).parse(stream, media_type,
+ parser_context)
+ try:
+ jsonschema.validate(data, schema)
+ except jsonschema.ValidationError as error:
+ raise ParseError(detail=error.message)
+ else:
+ return data
\ No newline at end of file
diff --git a/rorapi/common/record_utils.py b/rorapi/common/record_utils.py
new file mode 100644
index 0000000..2332785
--- /dev/null
+++ b/rorapi/common/record_utils.py
@@ -0,0 +1,34 @@
+import jsonschema
+import requests
+from iso639 import Lang
+
+
+def get_lang_code(lang_string):
+ lang_code = None
+ error = None
+ if len(lang_string) == 2:
+ lang_string = lang_string.lower()
+ else:
+ lang_string = lang_string.title()
+ try:
+ lg = Lang(lang_string)
+ lang_code = lg.pt1
+ except Exception as e:
+ error = e.msg
+ return error, lang_code
+
+def get_file_from_url(url):
+ rsp = requests.get(url)
+ rsp.raise_for_status()
+ return rsp.json()
+
+def validate_record(data, schema):
+ try:
+ print("validating data:")
+ print(data)
+ jsonschema.validate(data, schema)
+ except jsonschema.ValidationError as e:
+ return "Validation error: " + e.message, None
+ else:
+ return None, data
+
diff --git a/rorapi/common/urls.py b/rorapi/common/urls.py
index ba04d06..b9aa57c 100644
--- a/rorapi/common/urls.py
+++ b/rorapi/common/urls.py
@@ -1,22 +1,25 @@
from django.conf.urls import url, include
-from django.urls import path
+from django.urls import path, re_path
from rest_framework.documentation import include_docs_urls
-
from . import views
-from rorapi.common.views import HeartbeatView,GenerateAddress,GenerateId,IndexData,IndexDataDump
+from rorapi.common.views import (
+ HeartbeatView,GenerateAddress,GenerateId,IndexData,IndexDataDump,BulkUpdate)
urlpatterns = [
# Health check
url(r"^(?P(v1|v2))\/heartbeat$", HeartbeatView.as_view()),
url(r"^heartbeat$", HeartbeatView.as_view()),
# Using REST API
+ url(r"^(?P(v1|v2))\/generateaddress\/(?P[0-9]+)", GenerateAddress.as_view()),
path('generateaddress/', GenerateAddress.as_view()),
url(r"^generateid$", GenerateId.as_view()),
- path('indexdata/', IndexData.as_view()),
+ re_path(r"^(?P(v1|v2))\/bulkupdate$", BulkUpdate.as_view()),
+ url(r"^(?P(v1|v2))\/indexdata/(?P.*)", IndexData.as_view()),
url(r"^(?P(v1|v2))\/indexdatadump\/(?Pv(\d+\.)?(\d+\.)?(\*|\d+)-\d{4}-\d{2}-\d{2}-ror-data)\/(?P(test|prod))$", IndexDataDump.as_view()),
url(r"^(?P(v1|v2))\/", include(views.organizations_router.urls)),
url(r"^", include(views.organizations_router.urls)),
url(r"^docs/", include_docs_urls(title="Research Organization Registry")),
# Prometheus
url("", include("django_prometheus.urls")),
+
]
diff --git a/rorapi/common/views.py b/rorapi/common/views.py
index 3d76d2b..e163012 100644
--- a/rorapi/common/views.py
+++ b/rorapi/common/views.py
@@ -1,13 +1,19 @@
+import csv
from rest_framework import viewsets, routers, status
from rest_framework.response import Response
from django.http import HttpResponse
from django.views import View
from django.shortcuts import redirect
-from rest_framework.authentication import BasicAuthentication
from rest_framework.permissions import BasePermission
from rest_framework.views import APIView
-import json
+from rest_framework.parsers import FormParser, MultiPartParser
+from rorapi.settings import DATA
+import mimetypes
+import magic
+from rorapi.common.create_update import new_record_from_json, update_record_from_json
+from rorapi.common.csv_bulk import process_csv
+from rorapi.common.csv_utils import validate_csv
from rorapi.settings import REST_FRAMEWORK
from rorapi.common.matching import match_organizations
from rorapi.common.models import (
@@ -33,12 +39,32 @@
from rorapi.management.commands.generaterorid import check_ror_id
from rorapi.management.commands.generaterorid import check_ror_id
from rorapi.management.commands.indexror import process_files
-
from django.core import management
import rorapi.management.commands.indexrordump
+class OurTokenPermission(BasePermission):
+ """
+ Allows access only to using our token and user name.
+ """
+
+ def has_permission(self, request, view):
+ has_permission = False
+ if request.method == 'GET':
+ has_permission = True
+ else:
+ header_token = request.headers.get("Token", None)
+ header_user = request.headers.get("Route-User", None)
+ user = os.environ.get("ROUTE_USER")
+ token = os.environ.get("TOKEN")
+ if header_token == token and header_user == user:
+ has_permission = True
+
+ return has_permission
+
class OrganizationViewSet(viewsets.ViewSet):
+ permission_classes = [OurTokenPermission]
+
lookup_value_regex = r"((https?(:\/\/|%3A%2F%2F))?ror\.org(\/|%2F))?.*"
def list(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]):
@@ -87,6 +113,57 @@ def retrieve(self, request, pk=None, version=REST_FRAMEWORK["DEFAULT_VERSION"]):
serializer = OrganizationSerializerV1(organization)
return Response(serializer.data)
+ def create(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]):
+ errors = None
+ if version == "v2":
+ json_input = request.data
+ if 'id' in json_input and (json_input['id'] is not None and json_input['id'] != ""):
+ errors = Errors(["Value {} found in ID field. New records cannot contain a value in the ID field".format(json_input['id'])])
+ else:
+ create_error, valid_data = new_record_from_json(json_input, version)
+ if create_error:
+ errors = Errors([create_error])
+ else:
+ errors = Errors(["Version {} does not support creating records".format(version)])
+ if errors is not None:
+ return Response(
+ ErrorsSerializer(errors).data, status=status.HTTP_400_BAD_REQUEST
+ )
+ serializer = OrganizationSerializerV2(valid_data)
+ return Response(serializer.data)
+
+ def update(self, request, pk=None, version=REST_FRAMEWORK["DEFAULT_VERSION"]):
+ errors = None
+ if version == "v2":
+ ror_id = get_ror_id(pk)
+ if ror_id is None:
+ errors = Errors(["'{}' is not a valid ROR ID".format(pk)])
+ return Response(
+ ErrorsSerializer(errors).data, status=status.HTTP_404_NOT_FOUND
+ )
+ errors, organization = retrieve_organization(ror_id, version)
+ if organization is None:
+ return Response(
+ ErrorsSerializer(errors).data, status=status.HTTP_404_NOT_FOUND
+ )
+ json = request.data
+ if 'id' not in json:
+ errors = Errors(["No value found in ID field. Updated records must include a value in the ID field"])
+ elif get_ror_id(json['id']) != ror_id:
+ errors = Errors(["Value {} in IDs field does not match resource ID specified in request URL {}".format(json['id'], pk)])
+ else:
+ update_error, valid_data = update_record_from_json(json, organization)
+ if update_error:
+ errors = Errors([update_error])
+ else:
+ errors = Errors(["Version {} does not support creating records".format(version)])
+ if errors is not None:
+ return Response(
+ ErrorsSerializer(errors).data, status=status.HTTP_400_BAD_REQUEST
+ )
+ serializer = OrganizationSerializerV2(valid_data)
+ return Response(serializer.data)
+
organizations_router = routers.DefaultRouter(trailing_slash=False)
organizations_router.register(
@@ -106,24 +183,14 @@ def get(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]):
return HttpResponse(status=500)
-class OurTokenPermission(BasePermission):
- """
- Allows access only to using our token and user name.
- """
-
- def has_permission(self, request, view):
- header_token = request.headers.get("Token", None)
- header_user = request.headers.get("Route-User", None)
- user = os.environ.get("ROUTE_USER")
- token = os.environ.get("TOKEN")
- return header_token == token and header_user == user
-
-
class GenerateAddress(APIView):
permission_classes = [OurTokenPermission]
- def get(self, request, geonamesid):
- address = ua.new_geonames(geonamesid)
+ def get(self, request, geonamesid, version=REST_FRAMEWORK["DEFAULT_VERSION"]):
+ if version == 'v2':
+ address = ua.new_geonames_v2(geonamesid)
+ else:
+ address = ua.new_geonames(geonamesid)
return Response(address)
@@ -138,14 +205,13 @@ def get(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]):
class IndexData(APIView):
permission_classes = [OurTokenPermission]
- def get(self, request, branch):
+ def get(self, request, branch, version=REST_FRAMEWORK["DEFAULT_VERSION"]):
st = 200
- msg = process_files(branch)
+ msg = process_files(branch, version)
if msg["status"] == "ERROR":
st = 400
return Response({"status": msg["status"], "msg": msg["msg"]}, status=st)
-
class IndexDataDump(APIView):
permission_classes = [OurTokenPermission]
@@ -162,3 +228,51 @@ def get(self, request, filename, dataenv, version=REST_FRAMEWORK["DEFAULT_VERSIO
st = 400
return Response({"status": msg}, status=st)
+
+
+class BulkUpdate(APIView):
+ permission_classes = [OurTokenPermission]
+ parser_classes = (MultiPartParser, FormParser)
+
+ def post(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]):
+ validate_only = False
+ errors = None
+ if version == 'v2':
+ if request.data:
+ file_object = request.data['file']
+ mime_type = magic.from_buffer(file_object.read(2048))
+ print(mime_type)
+ if "ASCII text" in mime_type or "UTF-8 text" in mime_type or "UTF-8 Unicode text" in mime_type or "CSV text" in mime_type:
+ file_object.seek(0)
+ csv_validation_errors = validate_csv(file_object)
+ if len(csv_validation_errors) == 0:
+ file_object.seek(0)
+ params = request.GET.dict()
+ if "validate" in params:
+ validate_only = True
+ process_csv_error, msg = process_csv(file_object, version, validate_only)
+ if process_csv_error:
+ errors = Errors([process_csv_error])
+ else:
+ errors=Errors(csv_validation_errors)
+ else:
+ errors = Errors(["File upload must be CSV. File type '{}' is not supported".format(mime_type)])
+ else:
+ errors = Errors(["Could not processs request. No data included in request."])
+ else:
+ errors = Errors(["Version {} does not support creating records".format(version)])
+ if errors is not None:
+ print(errors.__dict__)
+ return Response(
+ ErrorsSerializer(errors).data, status=status.HTTP_400_BAD_REQUEST
+ )
+ if validate_only:
+ with open(msg) as file:
+ response = HttpResponse(file, content_type='text/csv')
+ response['Content-Disposition'] = 'attachment; filename=reports.csv'
+ return response
+
+ return Response(
+ msg,
+ status=status.HTTP_201_CREATED
+ )
\ No newline at end of file
diff --git a/rorapi/management/commands/indexror.py b/rorapi/management/commands/indexror.py
index 59c43b4..b6d643e 100644
--- a/rorapi/management/commands/indexror.py
+++ b/rorapi/management/commands/indexror.py
@@ -13,7 +13,7 @@
from django.core.management.base import BaseCommand
from elasticsearch import TransportError
-def get_nested_names(org):
+def get_nested_names_v1(org):
yield org['name']
for label in org['labels']:
yield label['label']
@@ -22,8 +22,11 @@ def get_nested_names(org):
for acronym in org['acronyms']:
yield acronym
+def get_nested_names_v2(org):
+ for name in org['names']:
+ yield name['value']
-def get_nested_ids(org):
+def get_nested_ids_v1(org):
yield org['id']
yield re.sub('https://', '', org['id'])
yield re.sub('https://ror.org/', '', org['id'])
@@ -34,6 +37,14 @@ def get_nested_ids(org):
for eid in ext_id['all']:
yield eid
+def get_nested_ids_v2(org):
+ yield org['id']
+ yield re.sub('https://', '', org['id'])
+ yield re.sub('https://ror.org/', '', org['id'])
+ for ext_id in org['external_ids']:
+ for eid in ext_id['all']:
+ yield eid
+
def prepare_files(path, local_file):
data = []
err = {}
@@ -88,7 +99,7 @@ def get_data():
return contents, err
-def process_files(dir):
+def process_files(dir, version):
err = []
if dir:
path = os.path.join(DATA['WORKING_DIR'], dir)
@@ -104,7 +115,7 @@ def process_files(dir):
if path and file and not(e):
data, e = prepare_files(path, file)
if not(e):
- index_error = index(data)
+ index_error = index(data, version)
err.append(index_error)
else:
err.append(e)
@@ -116,14 +127,17 @@ def process_files(dir):
if err:
msg = {"status": "ERROR", "msg": err}
else:
- msg = {"status": "OK", "msg": f"{dir} indexed"}
+ msg = {"status": "OK", "msg": f"{dir} indexed using version {version}"}
return msg
-def index(dataset):
+def index(dataset, version):
err = {}
- index = ES_VARS['INDEX_V1']
+ if version == 'v2':
+ index = ES_VARS['INDEX_V2']
+ else:
+ index = ES_VARS['INDEX_V1']
backup_index = '{}-tmp'.format(index)
ES7.reindex(body={
'source': {
@@ -144,12 +158,20 @@ def index(dataset):
'_id': org['id']
}
})
- org['names_ids'] = [{
- 'name': n
- } for n in get_nested_names(org)]
- org['names_ids'] += [{
- 'id': n
- } for n in get_nested_ids(org)]
+ if 'v2' in index:
+ org['names_ids'] = [{
+ 'name': n
+ } for n in get_nested_names_v2(org)]
+ org['names_ids'] += [{
+ 'id': n
+ } for n in get_nested_ids_v2(org)]
+ else:
+ org['names_ids'] = [{
+ 'name': n
+ } for n in get_nested_names_v1(org)]
+ org['names_ids'] += [{
+ 'id': n
+ } for n in get_nested_ids_v1(org)]
body.append(org)
ES7.bulk(body)
except TransportError:
@@ -171,9 +193,11 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument('dir', type=str, help='add directory name for S3 bucket to be processed')
+ parser.add_argument('version', type=str, help='schema version of files to be processed')
def handle(self,*args, **options):
dir = options['dir']
- process_files(dir)
+ version = options['version']
+ process_files(dir, version)
diff --git a/rorapi/settings.py b/rorapi/settings.py
index 81476d7..a8f3504 100644
--- a/rorapi/settings.py
+++ b/rorapi/settings.py
@@ -39,7 +39,8 @@
'SECRET_KEY', '0y0zn=hnz99$+c6lejml@chch54s2y2@-z##i$pstn62doft_g')
# SECURITY WARNING: don't run with debug turned on in production!
-DEBUG = os.environ.get('PASSENGER_APP_ENV', 'development') == 'development'
+#DEBUG = os.environ.get('PASSENGER_APP_ENV', 'development') == 'development'
+DEBUG = False
ALLOWED_HOSTS = ['*']
@@ -253,6 +254,7 @@
DATA = {}
DATA['DATA_STORE'] = os.environ.get('DATA_STORE', None)
+DATA['PUBLIC_STORE'] = os.environ.get('PUBLIC_STORE', None)
DATA['WORKING_DIR'] = os.path.join(BASE_DIR, 'rorapi', 'data', '')
if DATA['DATA_STORE']:
diff --git a/rorapi/tests/tests_unit/data/test_data_create_valid.json b/rorapi/tests/tests_unit/data/test_data_create_valid.json
new file mode 100644
index 0000000..533d944
--- /dev/null
+++ b/rorapi/tests/tests_unit/data/test_data_create_valid.json
@@ -0,0 +1,68 @@
+{
+ "locations": [
+ {
+ "geonames_id": 2661552,
+ "geonames_details": {
+ "country_code": "CH",
+ "country_name": "Switzerland",
+ "lat": 46.94809,
+ "lng": 7.44744,
+ "name": "Bern"
+ }
+ }
+ ],
+ "established": null,
+ "external_ids": [
+ {
+ "type": "grid",
+ "all": [
+ "grid.426225.5"
+ ],
+ "preferred": "grid.426225.5"
+ }
+ ],
+ "id": "https://ror.org/00wz65j53",
+ "domains": ["wisc.edu"],
+ "links": [
+ {
+ "type": "website",
+ "value": "https://www.jdsu.com"
+ }
+ ],
+ "names": [
+ {
+ "value": "JDSU (Switzerland)",
+ "types": [
+ "ror_display",
+ "label"
+ ],
+ "lang": null
+ }
+ ],
+ "relationships": [
+ {
+ "label": "JDSU (United States)",
+ "type": "parent",
+ "id": "https://ror.org/01a5v8x09"
+ },
+ {
+ "label": "Viavi Solutions (United States)",
+ "type": "successor",
+ "id": "https://ror.org/059a9e323"
+ }
+ ],
+ "status": "inactive",
+ "types": [
+ "company"
+ ],
+ "admin": {
+ "created": {
+ "date": "2023-07-28",
+ "schema_version": "1.0"
+ },
+ "last_modified": {
+ "date": "2023-07-28",
+ "schema_version": "2.0"
+ }
+ }
+}
\ No newline at end of file
diff --git a/rorapi/tests/tests_unit/data/test_data_new_record_invalid_v2.json b/rorapi/tests/tests_unit/data/test_data_new_record_invalid_v2.json
new file mode 100644
index 0000000..61f3f2a
--- /dev/null
+++ b/rorapi/tests/tests_unit/data/test_data_new_record_invalid_v2.json
@@ -0,0 +1,29 @@
+{
+ "locations": [
+ {
+ "geonames_id": 2661552,
+ "geonames_details": {
+ "country_code": "CH",
+ "country_name": "Switzerland",
+ "lat": 46.94809,
+ "lng": 7.44744,
+ "name": "Bern"
+ }
+ }
+ ],
+ "names": [
+ {
+ "value": "JDSU (Switzerland)",
+ "types": [
+ "ror_display",
+ "label"
+ ],
+ "lang": null
+ }
+ ],
+ "status": "active",
+ "types": [
+ "company"
+ ],
+ "foo": "bar"
+}
\ No newline at end of file
diff --git a/rorapi/tests/tests_unit/data/test_data_new_record_valid_v2.json b/rorapi/tests/tests_unit/data/test_data_new_record_valid_v2.json
new file mode 100644
index 0000000..a349173
--- /dev/null
+++ b/rorapi/tests/tests_unit/data/test_data_new_record_valid_v2.json
@@ -0,0 +1,28 @@
+{
+ "locations": [
+ {
+ "geonames_id": 2661552,
+ "geonames_details": {
+ "country_code": "CH",
+ "country_name": "Switzerland",
+ "lat": 46.94809,
+ "lng": 7.44744,
+ "name": "Bern"
+ }
+ }
+ ],
+ "names": [
+ {
+ "value": "JDSU (Switzerland)",
+ "types": [
+ "ror_display",
+ "label"
+ ],
+ "lang": null
+ }
+ ],
+ "status": "active",
+ "types": [
+ "company"
+ ]
+}
\ No newline at end of file
diff --git a/rorapi/tests/tests_unit/data/test_update_valid.json b/rorapi/tests/tests_unit/data/test_update_valid.json
new file mode 100644
index 0000000..533d944
--- /dev/null
+++ b/rorapi/tests/tests_unit/data/test_update_valid.json
@@ -0,0 +1,68 @@
+{
+ "locations": [
+ {
+ "geonames_id": 2661552,
+ "geonames_details": {
+ "country_code": "CH",
+ "country_name": "Switzerland",
+ "lat": 46.94809,
+ "lng": 7.44744,
+ "name": "Bern"
+ }
+ }
+ ],
+ "established": null,
+ "external_ids": [
+ {
+ "type": "grid",
+ "all": [
+ "grid.426225.5"
+ ],
+ "preferred": "grid.426225.5"
+ }
+ ],
+ "id": "https://ror.org/00wz65j53",
+ "domains": ["wisc.edu"],
+ "links": [
+ {
+ "type": "website",
+ "value": "https://www.jdsu.com"
+ }
+ ],
+ "names": [
+ {
+ "value": "JDSU (Switzerland)",
+ "types": [
+ "ror_display",
+ "label"
+ ],
+ "lang": null
+ }
+ ],
+ "relationships": [
+ {
+ "label": "JDSU (United States)",
+ "type": "parent",
+ "id": "https://ror.org/01a5v8x09"
+ },
+ {
+ "label": "Viavi Solutions (United States)",
+ "type": "successor",
+ "id": "https://ror.org/059a9e323"
+ }
+ ],
+ "status": "inactive",
+ "types": [
+ "company"
+ ],
+ "admin": {
+ "created": {
+ "date": "2023-07-28",
+ "schema_version": "1.0"
+ },
+ "last_modified": {
+ "date": "2023-07-28",
+ "schema_version": "2.0"
+ }
+ }
+}
\ No newline at end of file
diff --git a/rorapi/tests/tests_unit/data/test_upload_csv.csv b/rorapi/tests/tests_unit/data/test_upload_csv.csv
new file mode 100644
index 0000000..494420f
--- /dev/null
+++ b/rorapi/tests/tests_unit/data/test_upload_csv.csv
@@ -0,0 +1,3 @@
+html_url,id,names.types.ror_display,status,types,names.types.alias,names.types.label,names.types.acronym,links.type.website,links.type.wikipedia,domains,established,external_ids.type.fundref.all,external_ids.type.fundref.preferred,external_ids.type.grid.all,external_ids.type.grid.preferred,external_ids.type.isni.all,external_ids.type.isni.preferred,external_ids.type.wikidata.all,external_ids.type.wikidata.preferred,city,country,locations.geonames_id
+https://github.com/ror-community/ror-updates/issues/9185,,Jizzakh branch of the National University of Uzbekistan named after Mirzo Ulugbek,active,Education,Jizzakh branch of the National University of Uzbekistan; Mirzo Ulug`bek nomidagi O`zbekiston milliy universiteti Jizzax filiali; Джизакский филиал Национального университета Узбекистана имени Мирзо Улугбека,Mirzo Ulug`bek nomidagi O`zbekiston milliy universiteti Jizzax filiali*Uzbek,,https://jbnuu.uz,https://uz.wikipedia.org/wiki/O%CA%BBzbekiston_milliy_universitetining_Jizzax_filiali,,2019,,,,,,,Q72342707,Q72342707,Jizzakh,Uzbekistan,1513886
+https://github.com/ror-community/ror-updates/issues/9389,,Znanstveno-raziskovalno središče Koper,active,Facility; Government,SRC Koper; ZRS Koper;,Science and Research Centre of Koper*English; Centro di ricerche scientifiche Capodistria*Italian,,https://www.zrs-kp.si;,,,,,,,,0000 0004 0398 0403,0000 0004 0398 0403,Q49569044,Q49569044,Koper,Slovenia,3197753
\ No newline at end of file
diff --git a/rorapi/tests/tests_unit/tests_models_v2.py b/rorapi/tests/tests_unit/tests_models_v2.py
index b798074..b0f9958 100644
--- a/rorapi/tests/tests_unit/tests_models_v2.py
+++ b/rorapi/tests/tests_unit/tests_models_v2.py
@@ -67,7 +67,8 @@ def test_attributes_exist(self):
organization = Organization(AttrDict(data))
self.assertEqual(organization.id, data["id"])
- self.assertEqual(organization.types, data["types"])
+ for i, type in enumerate(organization.types):
+ self.assertIn(organization.types[i], data["types"])
self.assertEqual(organization.established, data["established"])
self.assertEqual(
organization.locations[0].geonames_details.lat,
@@ -99,21 +100,18 @@ def test_attributes_exist(self):
self.assertEqual(len(organization.names), 6)
for i, name in enumerate(organization.names):
- self.assertEqual(organization.names[i].value, data["names"][i]["value"])
- self.assertEqual(organization.names[i].types, data["names"][i]["types"])
- self.assertEqual(organization.names[i].lang, data["names"][i]["lang"])
+ matched_names = [n for n in data["names"] if \
+ n['value']==organization.names[i].value and \
+ n['types']==organization.names[i].types and \
+ n['lang']==organization.names[i].lang]
+ self.assertTrue(len(matched_names) == 1)
for i, ext_id in enumerate(organization.external_ids):
- self.assertEqual(
- organization.external_ids[i].all, data["external_ids"][i]["all"]
- )
- self.assertEqual(
- organization.external_ids[i].preferred,
- data["external_ids"][i]["preferred"],
- )
- self.assertEqual(
- organization.external_ids[i].type, data["external_ids"][i]["type"]
- )
+ matched_ids = [e for e in data["external_ids"] if \
+ e['all']==organization.external_ids[i].all and \
+ e['preferred']==organization.external_ids[i].preferred and \
+ e['type']==organization.external_ids[i].type]
+ self.assertTrue(len(matched_ids) == 1)
class MatchedOrganizationTestCase(SimpleTestCase):
@@ -153,15 +151,5 @@ def test_attributes_exist(self):
self.assertEqual(organization.chosen, data["chosen"])
self.assertEqual(organization.organization.id, data["organization"]["id"])
for i, name in enumerate(organization.organization.names):
- self.assertEqual(
- organization.organization.names[i].value,
- data["organization"]["names"][i]["value"],
- )
- self.assertEqual(
- organization.organization.names[i].types,
- data["organization"]["names"][i]["types"],
- )
- self.assertEqual(
- organization.organization.names[i].lang,
- data["organization"]["names"][i]["lang"],
- )
+ matched_names = [n for n in data["organization"]["names"] if n['value']==organization.organization.names[i].value and n['types']==organization.organization.names[i].types and organization.organization.names[i].lang]
+ self.assertTrue(len(matched_names) == 1)
diff --git a/rorapi/tests/tests_unit/tests_queries_v2.py b/rorapi/tests/tests_unit/tests_queries_v2.py
index 9c600e0..b3bca45 100644
--- a/rorapi/tests/tests_unit/tests_queries_v2.py
+++ b/rorapi/tests/tests_unit/tests_queries_v2.py
@@ -537,12 +537,11 @@ def test_search_organizations(self, search_mock):
self.test_data['hits']['hits']):
self.assertEquals(ret.id, exp['_source']['id'])
for i, name in enumerate(ret.names):
- self.assertEqual(ret.names[i].value,
- exp['_source']['names'][i]['value'])
- self.assertEqual(ret.names[i].types,
- exp['_source']['names'][i]['types'])
- self.assertEqual(ret.names[i].lang,
- exp['_source']['names'][i]['lang'])
+ matched_names = [n for n in exp['_source']['names'] if \
+ n['value']==ret.names[i].value and \
+ n['types']==ret.names[i].types and \
+ n['lang']==ret.names[i].lang]
+ self.assertTrue(len(matched_names) == 1)
self.assertEquals(
len(organizations.meta.types),
len(self.test_data['aggregations']['types']['buckets']))
@@ -612,12 +611,11 @@ def test_retrieve_organization(self, search_mock):
expected = self.test_data['hits']['hits'][0]['_source']
self.assertEquals(organization.id, expected['id'])
for i, name in enumerate(organization.names):
- self.assertEqual(organization.names[i].value,
- expected['names'][i]['value'])
- self.assertEqual(organization.names[i].types,
- expected['names'][i]['types'])
- self.assertEqual(organization.names[i].lang,
- expected['names'][i]['lang'])
+ matched_names = [n for n in expected["names"] if \
+ n['value']==organization.names[i].value and \
+ n['types']==organization.names[i].types and \
+ n['lang']==organization.names[i].lang]
+ self.assertTrue(len(matched_names) == 1)
@mock.patch('elasticsearch_dsl.Search.execute')
def test_retrieve_non_existing_organization(self, search_mock):
diff --git a/rorapi/tests/tests_unit/tests_views_v1.py b/rorapi/tests/tests_unit/tests_views_v1.py
index 9a2490f..07ba86b 100644
--- a/rorapi/tests/tests_unit/tests_views_v1.py
+++ b/rorapi/tests/tests_unit/tests_views_v1.py
@@ -218,7 +218,7 @@ def setUp(self):
def test_index_ror_success(self, index_mock, permission_mock):
index_mock.return_value = self.success_msg
permission_mock.return_value = True
- response = self.client.get('/indexdata/foo')
+ response = self.client.get('/v1/indexdata/foo')
self.assertEquals(response.status_code, 200)
@mock.patch('rorapi.common.views.OurTokenPermission.has_permission')
@@ -226,13 +226,13 @@ def test_index_ror_success(self, index_mock, permission_mock):
def test_index_ror_fail_error(self, index_mock, permission_mock):
index_mock.return_value = self.error_msg
permission_mock.return_value = True
- response = self.client.get('/indexdata/foo')
+ response = self.client.get('/v1/indexdata/foo')
self.assertEquals(response.status_code, 400)
@mock.patch('rorapi.common.views.OurTokenPermission.has_permission')
def test_index_ror_fail_no_permission(self, permission_mock):
permission_mock.return_value = False
- response = self.client.get('/indexdata/foo')
+ response = self.client.get('/v1/indexdata/foo')
self.assertEquals(response.status_code, 403)
class HeartbeatViewTestCase(SimpleTestCase):
diff --git a/rorapi/tests/tests_unit/tests_views_v2.py b/rorapi/tests/tests_unit/tests_views_v2.py
index 5a0baf9..a36325e 100644
--- a/rorapi/tests/tests_unit/tests_views_v2.py
+++ b/rorapi/tests/tests_unit/tests_views_v2.py
@@ -6,6 +6,7 @@
from rest_framework.test import APIRequestFactory
from rorapi.common import views
+from rorapi.v2.models import Organization as OrganizationV2
from .utils import IterableAttrDict
@@ -107,7 +108,25 @@ def test_retrieve_organization(self, search_mock):
organization = json.loads(response.content.decode('utf-8'))
# go through every attribute and check to see that they are equal
self.assertEquals(response.status_code, 200)
- self.assertEquals(organization, self.test_data['hits']['hits'][0]['_source'])
+ self.assertEquals(organization['admin'], self.test_data['hits']['hits'][0]['_source']['admin'])
+ for d in organization['domains']:
+ self.assertIn(d, self.test_data['hits']['hits'][0]['_source']['domains'])
+ self.assertEquals(organization['established'], self.test_data['hits']['hits'][0]['_source']['established'])
+ for e in organization['external_ids']:
+ self.assertIn(e, self.test_data['hits']['hits'][0]['_source']['external_ids'])
+ self.assertEquals(organization['id'], self.test_data['hits']['hits'][0]['_source']['id'])
+ for l in organization['links']:
+ self.assertIn(l, self.test_data['hits']['hits'][0]['_source']['links'])
+ for l in organization['locations']:
+ self.assertIn(l, self.test_data['hits']['hits'][0]['_source']['locations'])
+ for n in organization['names']:
+ self.assertIn(n, self.test_data['hits']['hits'][0]['_source']['names'])
+ for r in organization['relationships']:
+ self.assertIn(r, self.test_data['hits']['hits'][0]['_source']['relationships'])
+ self.assertEquals(organization['status'], self.test_data['hits']['hits'][0]['_source']['status'])
+ for t in organization['types']:
+ self.assertIn(t, self.test_data['hits']['hits'][0]['_source']['types'])
+
@mock.patch('elasticsearch_dsl.Search.execute')
def test_retrieve_non_existing_organization(self, search_mock):
@@ -143,6 +162,7 @@ def test_retrieve_invalid_id(self, search_mock):
self.assertEquals(len(organization['errors']), 1)
self.assertTrue(any(['not a valid' in e for e in organization['errors']]))
+
class GenerateIdViewTestCase(SimpleTestCase):
def setUp(self):
with open(
@@ -167,6 +187,32 @@ def test_generateid_fail_no_permission(self, permission_mock):
response = self.client.get('/generateid')
self.assertEquals(response.status_code, 403)
+
+class GenerateIdViewTestCase(SimpleTestCase):
+ def setUp(self):
+ with open(
+ os.path.join(os.path.dirname(__file__),
+ 'data/test_data_empty_es7.json'), 'r') as f:
+ self.test_data_empty = json.load(f)
+ self.maxDiff = None
+
+ @mock.patch('rorapi.common.views.OurTokenPermission.has_permission')
+ @mock.patch('elasticsearch_dsl.Search.execute')
+ def test_generateid_success(self, search_mock, permission_mock):
+ search_mock.return_value = \
+ IterableAttrDict(self.test_data_empty,
+ self.test_data_empty['hits']['hits'])
+ permission_mock.return_value = True
+ response = self.client.get('/generateid')
+ self.assertEquals(response.status_code, 200)
+
+ @mock.patch('rorapi.common.views.OurTokenPermission.has_permission')
+ def test_generateid_fail_no_permission(self, permission_mock):
+ permission_mock.return_value = False
+ response = self.client.get('/generateid')
+ self.assertEquals(response.status_code, 403)
+
+
class GenerateAddressViewTestCase(SimpleTestCase):
def setUp(self):
with open(
@@ -214,7 +260,7 @@ def setUp(self):
def test_index_ror_success(self, index_mock, permission_mock):
index_mock.return_value = self.success_msg
permission_mock.return_value = True
- response = self.client.get('/indexdata/foo')
+ response = self.client.get('/v2/indexdata/foo')
self.assertEquals(response.status_code, 200)
@mock.patch('rorapi.common.views.OurTokenPermission.has_permission')
@@ -222,13 +268,13 @@ def test_index_ror_success(self, index_mock, permission_mock):
def test_index_ror_fail_error(self, index_mock, permission_mock):
index_mock.return_value = self.error_msg
permission_mock.return_value = True
- response = self.client.get('/indexdata/foo')
+ response = self.client.get('/v2/indexdata/foo')
self.assertEquals(response.status_code, 400)
@mock.patch('rorapi.common.views.OurTokenPermission.has_permission')
def test_index_ror_fail_no_permission(self, permission_mock):
permission_mock.return_value = False
- response = self.client.get('/indexdata/foo')
+ response = self.client.get('/v2/indexdata/foo')
self.assertEquals(response.status_code, 403)
class HeartbeatViewTestCase(SimpleTestCase):
@@ -245,6 +291,56 @@ def test_heartbeat_success(self, search_mock):
response = self.client.get('/v2/heartbeat')
self.assertEquals(response.status_code, 200)
+class BulkUpdateViewTestCase(SimpleTestCase):
+ def setUp(self):
+ self.csv_errors_empty = []
+ self.csv_errors_error = ['error']
+ self.process_csv_msg = {"filename":"filename.zip", "rows processed":1,"created":0,"udpated":0,"skipped":1}
+ self.maxDiff = None
+
+ @mock.patch('rorapi.common.views.OurTokenPermission.has_permission')
+ @mock.patch('rorapi.common.views.validate_csv')
+ @mock.patch('rorapi.common.views.process_csv')
+ def test_bulkupdate_success(self, process_csv_mock, validate_csv_mock, permission_mock):
+
+ permission_mock.return_value = True
+ validate_csv_mock.return_value = self.csv_errors_empty
+ process_csv_mock.return_value = None, self.process_csv_msg
+ with open(os.path.join(os.path.dirname(__file__),
+ 'data/test_upload_csv.csv'), 'rb') as f:
+ response = self.client.post('/v2/bulkupdate', {"file":f})
+ self.assertEquals(response.status_code, 201)
+
+ @mock.patch('rorapi.common.views.OurTokenPermission.has_permission')
+ @mock.patch('rorapi.common.views.validate_csv')
+ def test_bulkupdate_fail_error(self, validate_csv_mock, permission_mock):
+ permission_mock.return_value = True
+ validate_csv_mock.return_value = self.csv_errors_error
+ response = self.client.post('/v2/bulkupdate')
+ self.assertEquals(response.status_code, 400)
+
+ @mock.patch('rorapi.common.views.OurTokenPermission.has_permission')
+ def test_bulkupdate_fail_no_permission(self, permission_mock):
+ permission_mock.return_value = False
+ response = self.client.post('/v2/bulkupdate')
+ self.assertEquals(response.status_code, 403)
+
+class CreateOrganizationViewTestCase(SimpleTestCase):
+ # TODO: complete tests. For now just test that endpoint can't be accessed without creds.
+ @mock.patch('rorapi.common.views.OurTokenPermission.has_permission')
+ def test_create_record_fail_no_permission(self, permission_mock):
+ permission_mock.return_value = False
+ response = self.client.post('/v2/organizations')
+ self.assertEquals(response.status_code, 403)
+
+class UpdateOrganizationViewTestCase(SimpleTestCase):
+ # TODO: complete tests. For now just test that endpoint can't be accessed without creds.
+ @mock.patch('rorapi.common.views.OurTokenPermission.has_permission')
+ def test_create_record_fail_no_permission(self, permission_mock):
+ permission_mock.return_value = False
+ response = self.client.put('/v2/organizations/foo')
+ self.assertEquals(response.status_code, 403)
+
class IndexRorDumpViewTestCase(SimpleTestCase):
def setUp(self):
self.success_msg = "SUCCESS: ROR dataset vX.XX-XXXX-XX-XX-ror-data indexed in version X. Using test repo: X"
diff --git a/rorapi/v2/models.py b/rorapi/v2/models.py
index fd13fce..3a13e19 100644
--- a/rorapi/v2/models.py
+++ b/rorapi/v2/models.py
@@ -48,18 +48,25 @@ def __init__(self, data):
if "_source" in data:
data = data["_source"]
super(Organization, self).__init__(
- data, ["domains", "established", "id", "types", "status"]
+ data, ["established", "id", "status"]
)
self.admin = Admin(data.admin)
+ self.domains = sorted(data.domains)
+ sorted_ext_ids = sorted(data.external_ids, key=lambda x: x['type'])
self.external_ids = [
- Entity(e, ["type", "preferred", "all"]) for e in data.external_ids
+ Entity(e, ["type", "preferred", "all"]) for e in sorted_ext_ids
]
- self.links = [Entity(l, ["value", "type"]) for l in data.links]
- self.locations = [Location(l) for l in data.locations]
- self.names = [Entity(n, ["value", "lang", "types"]) for n in data.names]
+ sorted_links = sorted(data.links, key=lambda x: x['type'])
+ self.links = [Entity(l, ["value", "type"]) for l in sorted_links]
+ sorted_locations = sorted(data.locations, key=lambda x: x['geonames_id'])
+ self.locations = [Location(l) for l in sorted_locations]
+ sorted_names = sorted(data.names, key=lambda x: x['value'])
+ self.names = [Entity(n, ["value", "lang", "types"]) for n in sorted_names]
+ sorted_rels = sorted(data.relationships, key=lambda x: x['type'])
self.relationships = [
- Entity(r, ["type", "label", "id"]) for r in data.relationships
+ Entity(r, ["type", "label", "id"]) for r in sorted_rels
]
+ self.types = sorted(data.types)
class ListResult:
diff --git a/rorapi/v2/record_constants.py b/rorapi/v2/record_constants.py
new file mode 100644
index 0000000..e962615
--- /dev/null
+++ b/rorapi/v2/record_constants.py
@@ -0,0 +1,67 @@
+V2_ADMIN = {
+ "created": {
+ "date": "",
+ "schema_version": "2.0"
+ },
+ "last_modified": {
+ "date": "",
+ "schema_version": "2.0"
+ }
+}
+
+V2_LAST_MOD = {
+ "date": "",
+ "schema_version": "2.0"
+}
+
+V2_OPTIONAL_FIELD_DEFAULTS = {
+ "domains": [],
+ "established": None,
+ "external_ids": [],
+ "links": [],
+ "relationships": []
+}
+
+V2_TEMPLATE = {
+ "locations": [],
+ "established": None,
+ "external_ids": [],
+ "id": "",
+ "domains": [],
+ "links": [],
+ "names": [],
+ "relationships": [],
+ "status": "",
+ "types": [],
+ "admin": {}
+}
+
+V2_EXTERNAL_ID_TYPES = {
+ "FUNDREF" : "fundref",
+ "GRID" : "grid",
+ "ISNI" : "isni",
+ "WIKIDATA" : "wikidata"
+ }
+
+V2_LINK_TYPES = {
+ "WEBSITE" : "website",
+ "WIKIPEDIA" : "wikipedia"
+ }
+
+V2_NAME_TYPES = {
+ "ACRONYM" : "acronym",
+ "ALIAS" : "alias",
+ "LABEL" : "label",
+ "ROR_DISPLAY" : "ror_display"
+ }
+
+V2_SORT_KEYS = {
+ "domains": None,
+ "external_ids": "type",
+ "links": "type",
+ "locations": "geonames_id",
+ "names": "value",
+ "relationships": "type",
+ "types": None
+}
+
diff --git a/rorapi/v2/record_template.json b/rorapi/v2/record_template.json
new file mode 100644
index 0000000..906a5b7
--- /dev/null
+++ b/rorapi/v2/record_template.json
@@ -0,0 +1,13 @@
+{
+ "locations": [],
+ "established": null,
+ "external_ids": [],
+ "id": "",
+ "domains": [],
+ "links": [],
+ "names": [],
+ "relationships": [],
+ "status": "",
+ "types": [],
+ "admin": {}
+}
\ No newline at end of file