From 034afafcd352fea25b0c15c40b4d3dd60e6361a8 Mon Sep 17 00:00:00 2001 From: Jeremy Adams Date: Mon, 5 Jul 2021 09:34:10 -0400 Subject: [PATCH] move 'More Background on Compact Identifiers" subdoc to its own folder, build via same process as main spec document --- .spec-docs.json | 17 +- .travis.yml | 2 +- openapi/tags/Auth.md | 18 ++ openapi/tags/CompactIdentifierBasedURIs.md | 4 +- openapi/tags/DrsApiPrinciples.md | 10 +- openapi/tags/Motivation.md | 6 +- .../openapi.yaml | 29 ++++ .../tags/About.md | 1 + .../BackgroundOnCompactIdentiferBasedURIs.md | 32 ++++ .../tags/ExampleExistingProvider.md | 43 +++++ .../tags/ExampleRegisterIdentifier.md | 49 ++++++ .../tags/RegisteringOnMetaResolver.md | 9 + {sources => public}/img/figure1.png | Bin {sources => public}/img/figure2.png | Bin {sources => public}/img/figure3.png | Bin {sources => public}/img/prefix_register_1.png | Bin {sources => public}/img/prefix_register_2.png | Bin .../appendix_background_notes_on_drs_uris.md | 7 - .../appendix_compact_identifier_based_uris.md | 73 --------- sources/md/appendix_hostname_based_uris.md | 7 - sources/md/appendix_motivation.md | 32 ---- sources/md/front_matter.md | 154 ------------------ .../more_background_on_compact_identifiers.md | 148 ----------------- 23 files changed, 207 insertions(+), 434 deletions(-) create mode 100644 pages/more-background-on-compact-identifiers/openapi.yaml create mode 100644 pages/more-background-on-compact-identifiers/tags/About.md create mode 100644 pages/more-background-on-compact-identifiers/tags/BackgroundOnCompactIdentiferBasedURIs.md create mode 100644 pages/more-background-on-compact-identifiers/tags/ExampleExistingProvider.md create mode 100644 pages/more-background-on-compact-identifiers/tags/ExampleRegisterIdentifier.md create mode 100644 pages/more-background-on-compact-identifiers/tags/RegisteringOnMetaResolver.md rename {sources => public}/img/figure1.png (100%) rename {sources => public}/img/figure2.png (100%) rename {sources => public}/img/figure3.png (100%) rename {sources => public}/img/prefix_register_1.png (100%) rename {sources => public}/img/prefix_register_2.png (100%) delete mode 100644 sources/md/appendix_background_notes_on_drs_uris.md delete mode 100644 sources/md/appendix_compact_identifier_based_uris.md delete mode 100644 sources/md/appendix_hostname_based_uris.md delete mode 100644 sources/md/appendix_motivation.md delete mode 100644 sources/md/front_matter.md delete mode 100644 sources/md/more_background_on_compact_identifiers.md diff --git a/.spec-docs.json b/.spec-docs.json index 576022128..54ec52f63 100644 --- a/.spec-docs.json +++ b/.spec-docs.json @@ -1,7 +1,20 @@ { - "apiSpecPath": "openapi/data_repository_service.openapi.yaml", "docsRoot": "docs", "defaultBranch": "master", "branchPathBase": "preview", - "redocTheme": "ga4gh" + "redocTheme": "ga4gh", + "buildPages": [ + { + "apiSpecPath": "openapi/data_repository_service.openapi.yaml", + "htmlOutfile": "index.html", + "yamlOutfile": "openapi.yaml", + "jsonOutfile": "openapi.json" + }, + { + "apiSpecPath": "pages/more-background-on-compact-identifiers/openapi.yaml", + "htmlOutfile": "more-background-on-compact-identifiers.html", + "yamlOutfile": "more-background-on-compact-identifiers.yaml", + "jsonOutfile": "more-background-on-compact-identifiers.json" + } + ] } diff --git a/.travis.yml b/.travis.yml index dcc83ff7e..b0ac9305e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ jobs: - "12" before_script: - npm install -g @redocly/openapi-cli && npm install -g redoc-cli - - npm install -g gh-openapi-docs + - npm install -g @ga4gh/gh-openapi-docs@0.2.2-rc3 script: - gh-openapi-docs deploy: diff --git a/openapi/tags/Auth.md b/openapi/tags/Auth.md index ecc588e7b..bd4925f31 100644 --- a/openapi/tags/Auth.md +++ b/openapi/tags/Auth.md @@ -22,3 +22,21 @@ The DRS API allows implementers to support a variety of different content access * caller fetches the object bytes from the `url` (passing auth info from the specified headers, if any) DRS implementers should ensure their solutions restrict access to targets as much as possible, detect attempts to exploit through log monitoring, and they are prepared to take action if an exploit in their DRS implementation is detected. + +## Authentication + +### BasicAuth + +A valid authorization token must be passed in the 'Authorization' header, e.g. "Basic ${token_string}" + +| Security Scheme Type | HTTP | +|----------------------|------| +| **HTTP Authorization Scheme** | basic | + +### BearerAuth + +A valid authorization token must be passed in the 'Authorization' header, e.g. "Bearer ${token_string}" + +| Security Scheme Type | HTTP | +|----------------------|------| +| **HTTP Authorization Scheme** | bearer | diff --git a/openapi/tags/CompactIdentifierBasedURIs.md b/openapi/tags/CompactIdentifierBasedURIs.md index 4ca72bef5..3fc2ce0f2 100644 --- a/openapi/tags/CompactIdentifierBasedURIs.md +++ b/openapi/tags/CompactIdentifierBasedURIs.md @@ -4,7 +4,7 @@ The examples below show the current API interactions with [n2t.net](https://n2t. ## Registering a DRS Server on a Meta-Resolver -See the documentation on the [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) meta-resolvers for adding your own compact identifier type and registering your DRS server as a resolver. You can register new prefixes (or mirrors by adding resource provider codes) for free using a simple online form. For more information see [More Background on Compact Identifiers](/data-repository-service-schemas/sources/md/more_background_on_compact_identifiers). +See the documentation on the [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) meta-resolvers for adding your own compact identifier type and registering your DRS server as a resolver. You can register new prefixes (or mirrors by adding resource provider codes) for free using a simple online form. For more information see [More Background on Compact Identifiers](./more-background-on-compact-identifiers.html). ## Calling Meta-Resolver APIs for Compact Identifier-Based DRS URIs @@ -70,4 +70,4 @@ The compact identifier format used by identifiers.org/n2t.net does not percent-e ## Additional Examples -For additional examples, see the document [More Background on Compact Identifiers](/data-repository-service-schemas/sources/md/more_background_on_compact_identifiers). +For additional examples, see the document [More Background on Compact Identifiers](./more-background-on-compact-identifiers.html). diff --git a/openapi/tags/DrsApiPrinciples.md b/openapi/tags/DrsApiPrinciples.md index bb3cb7714..5788f3937 100644 --- a/openapi/tags/DrsApiPrinciples.md +++ b/openapi/tags/DrsApiPrinciples.md @@ -15,7 +15,7 @@ For convenience, including when passing content references to a [WES server](htt There are two styles of DRS URIs, Hostname-based and Compact Identifier-based, both using the `drs://` URI scheme. DRS servers may choose either style when exposing references to their content;. DRS clients MUST support resolving both styles. Tip: -> See [Appendix: Background Notes on DRS URIs](#tag/Appendix:-Background-Notes-on-DRS-URIs) for more information on our design motivations for DRS URIs. +> See [Appendix: Background Notes on DRS URIs](#tag/Background-Notes-on-DRS-URIs) for more information on our design motivations for DRS URIs. ### Hostname-based DRS URIs @@ -42,13 +42,13 @@ GET https://drs.example.org/ga4gh/drs/v1/objects/314159 The protocol is always https and the port is always the standard 443 SSL port. It is invalid to include a different port in a DRS hostname-based URI. Tip: -> See the [Appendix: Hostname-Based URIs](#tag/Appendix:-Hostname-Based-URIs) for information on how hostname-based DRS URI resolution to URLs is likely to change in the future, when the DRS v2 major release happens. +> See the [Appendix: Hostname-Based URIs](#tag/Hostname-Based-URIs) for information on how hostname-based DRS URI resolution to URLs is likely to change in the future, when the DRS v2 major release happens. ### Compact Identifier-based DRS URIs Compact Identifier-based DRS URIs use resolver registry services (specifically, [identifiers.org](https://identifiers.org/) and [n2t.net (Name-To-Thing)](https://n2t.net/)) to provide a layer of indirection between the DRS URI and the DRS server name — the actual DNS name of the DRS server isn’t present in the URI. This approach is based on the Joint Declaration of Data Citation Principles as detailed by [Wimalaratne et al (2018)](https://www.nature.com/articles/sdata201829). -For more information, see the document [More Background on Compact Identifiers](/data-repository-service-schemas/sources/md/more_background_on_compact_identifiers). +For more information, see the document [More Background on Compact Identifiers](./more-background-on-compact-identifiers.html). Compact Identifiers take the form: @@ -59,7 +59,7 @@ drs://[provider_code/]namespace:accession Together, provider code and the namespace are referred to as the `prefix`. The provider code is optional and is used by identifiers.org/n2t.net for compact identifier resolver mirrors. Both the `provider_code` and `namespace` disallow spaces or punctuation, only lowercase alphanumerical characters, underscores and dots are allowed (e.g. [A-Za-z0-9._]). Tip: -> See the [Appendix: Compact Identifier-Based URIs](#tag/Appendix:-Compact-Identifier-Based-URIs) for more background on Compact Identifiers and resolver registry services like identifiers.org/n2t.net (aka meta-resolvers), how to register prefixes, possible caching strategies, and security considerations. +> See the [Appendix: Compact Identifier-Based URIs](#tag/Compact-Identifier-Based-URIs) for more background on Compact Identifiers and resolver registry services like identifiers.org/n2t.net (aka meta-resolvers), how to register prefixes, possible caching strategies, and security considerations. #### For DRS Servers @@ -101,7 +101,7 @@ DRS servers can choose to issue either hostname-based or compact identifier-base |-------------------|----------------|--------------------------| | URI Durability | URIs are valid for as long as the server operator maintains ownership of the published DNS address. (They can of course point that address at different physical serving infrastructure as often as they’d like.) | URIs are valid for as long as the server operator maintains ownership of the published compact identifier resolver namespace. (They also depend on the meta-resolvers like identifiers.org/n2t.net remaining operational, which is intended to be essentially forever.) | | Client Efficiency | URIs require minimal client logic, and no network requests, to resolve. | URIs require small client logic, and 1-2 cacheable network requests, to resolve. | -| Security | Servers have full control over their own security practices. | Server operators, in addition to maintaining their own security practices, should confirm they are comfortable with the resolver registry security practices, including protection against denial of service and namespace-hijacking attacks. (See the [Appendix: Compact Identifier-Based URIs](#tag/Appendix:-Compact-Identifier-Based-URIs) for more information on resolver registry security.) | +| Security | Servers have full control over their own security practices. | Server operators, in addition to maintaining their own security practices, should confirm they are comfortable with the resolver registry security practices, including protection against denial of service and namespace-hijacking attacks. (See the [Appendix: Compact Identifier-Based URIs](#tag/Compact-Identifier-Based-URIs) for more information on resolver registry security.) | ## DRS Datatypes diff --git a/openapi/tags/Motivation.md b/openapi/tags/Motivation.md index de9dc39e1..27735db9f 100644 --- a/openapi/tags/Motivation.md +++ b/openapi/tags/Motivation.md @@ -4,7 +4,7 @@ Data sharing requires portable data, consistent with the FAIR data principles (findable, accessible, interoperable, reusable). Today’s researchers and clinicians are surrounded by potentially useful data, but often need bespoke tools and processes to work with each dataset. Today’s data publishers don’t have a reliable way to make their data useful to all (and only) the people they choose. And today’s data controllers are tasked with implementing standard controls of non-standard mechanisms for data access. - + Figure 1: there’s an ocean of data, with many different tools to drink from it, but no guarantee that any tool will work with any subset of the data @@ -18,7 +18,7 @@ We need a standard way for data producers to make their data available to data consumers, that supports the control needs of the former and the access needs of the latter. And we need it to be interoperable, so anyone who builds access tools and systems can be confident they’ll work with all the data out there, and anyone who publishes data can be confident it will work with all the tools out there. - + Figure 2: by defining a standard Data Repository API, and adapting tools to use it, every data publisher can now make their data useful to every data consumer @@ -49,7 +49,7 @@ - + Figure 3: a standard Data Repository API enables an ecosystem of data producers and consumers diff --git a/pages/more-background-on-compact-identifiers/openapi.yaml b/pages/more-background-on-compact-identifiers/openapi.yaml new file mode 100644 index 000000000..5f053eed0 --- /dev/null +++ b/pages/more-background-on-compact-identifiers/openapi.yaml @@ -0,0 +1,29 @@ +openapi: 3.0.3 +info: + title: More Background on Compact Identifiers + version: 1.1.0 + x-logo: + url: 'https://www.ga4gh.org/wp-content/themes/ga4gh-theme/gfx/GA-logo-horizontal-tag-RGB.svg' + termsOfService: 'https://www.ga4gh.org/terms-and-conditions/' + contact: + name: GA4GH Cloud Work Stream + email: ga4gh-cloud@ga4gh.org + license: + name: Apache 2.0 + url: 'https://raw.githubusercontent.com/ga4gh/data-repository-service-schemas/master/LICENSE' +tags: + - name: About + description: + $ref: ./tags/About.md + - name: Background on Compact Identifier-Based URIs + description: + $ref: ./tags/BackgroundOnCompactIdentiferBasedURIs.md + - name: Registering a DRS Server on a Meta-Resolver + description: + $ref: ./tags/RegisteringOnMetaResolver.md + - name: Example DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider + description: + $ref: ./tags/ExampleExistingProvider.md + - name: Example DRS Client Compact Identifier-Based URI Resolution Process - Registering a new Compact Identifier for Your DRS Server + description: + $ref: ./tags/ExampleRegisterIdentifier.md diff --git a/pages/more-background-on-compact-identifiers/tags/About.md b/pages/more-background-on-compact-identifiers/tags/About.md new file mode 100644 index 000000000..5099f8955 --- /dev/null +++ b/pages/more-background-on-compact-identifiers/tags/About.md @@ -0,0 +1 @@ +This document contains more examples of resolving compact identifier-based DRS URIs than we could fit in the DRS specification or appendix. It’s provided here for your reference as a supplement to the specification. diff --git a/pages/more-background-on-compact-identifiers/tags/BackgroundOnCompactIdentiferBasedURIs.md b/pages/more-background-on-compact-identifiers/tags/BackgroundOnCompactIdentiferBasedURIs.md new file mode 100644 index 000000000..4dfcf1838 --- /dev/null +++ b/pages/more-background-on-compact-identifiers/tags/BackgroundOnCompactIdentiferBasedURIs.md @@ -0,0 +1,32 @@ +Compact identifiers refer to locally-unique persistent identifiers that have been namespaced to provide global uniqueness. See ["Uniform resolution of compact identifiers for biomedical data"](https://www.biorxiv.org/content/10.1101/101279v3) for an excellent introduction to this topic. By using compact identifiers in DRS URIs, along with a resolver registry (identifiers.org/n2t.net), systems can identify the current resolver when they need to translate a DRS URI into a fetchable URL. This allows a project to issue compact identifiers in DRS URIs and not be concerned if the project name or DRS hostname changes in the future, the current resolver can always be found through the identifiers.org/n2t.net registries. Together the identifiers.org/n2t.net systems support the resolver lookup for over 700 compact identifiers formats used in the research community, making it possible for a DRS server to use any of these as DRS IDs (or to register a new compact identifier type and resolver service of their own). + +We use a DRS URI scheme rather than [Compact URIs (CURIEs)](https://en.wikipedia.org/wiki/CURIE) directly since we feel that systems consuming DRS objects will be able to better differentiate a DRS URI. CURIEs are widely used in the research community and we feel the fact that they can point to a wide variety of entities (HTML documents, PDFs, identities in data models, etc) makes it more difficult for systems to unambiguously identify entities as DRS objects. + +Still, to make compact identifiers work in DRS URIs we leverage the CURIE format used by identifiers.org/n2t.net. Compact identifiers have the form: + +``` +prefix:accession +``` + +The prefix can be divided into a `provider_code` (optional) and `namespace`. The `accession` here is an Ark, DOI, Data GUID, or another issuers’s local ID for the object being pointed to: + +``` +[provider_code/]namespace:accession +``` + +Both the `provider_code` and `namespace` disallow spaces or punctuation, only lowercase alphanumerical characters, underscores and dots are allowed. + +[Examples](https://n2t.net/e/compact_ids.html) include (from n2t.net): + +``` +PDB:2gc4 +Taxon:9606 +DOI:10.5281/ZENODO.1289856 +ark:/47881/m6g15z54 +IGSN:SSH000SUA +``` + +Tip: +> DRS URIs using compact identifiers with resolvers registered in identifiers.org/n2t.net can be distinguished from the hostname-based DRS URIs below based on the required ":" which is not allowed in hostname-based URI. + +See the documentation on [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) for much more information on the compact identifiers used there and details about the resolution process. diff --git a/pages/more-background-on-compact-identifiers/tags/ExampleExistingProvider.md b/pages/more-background-on-compact-identifiers/tags/ExampleExistingProvider.md new file mode 100644 index 000000000..bf269bef0 --- /dev/null +++ b/pages/more-background-on-compact-identifiers/tags/ExampleExistingProvider.md @@ -0,0 +1,43 @@ +A DRS client identifies the a DRS URI compact identifier components using the first occurance of "/" (optional) and ":" characters. These are not allowed inside the provider_code (optional) or the namespace. The ":" character is not allowed in a Hostname-based DRS URI, providing a convenient mechanism to differentiate them. Once the provider_code (optional) and namespace are extracted from a DRS compact identifier-based URI, a client can use services on identifiers.org to identify available resolvers. + +*Let’s look at a specific example DRS compact identifier-based URI that uses DOIs, a popular compact identifier, and walk through the process that a client would use to resolve it. Keep in mind, the resolution process is the same from the client perspective if a given DRS server is using an existing compact identifier type (DOIs, ARKs, Data GUIDs) or creating their own compact identifier type for their DRS server and registering it on identifiers.org/n2t.net.* + +Starting with the DRS URI: + +``` +drs://doi:10.5072/FK2805660V +``` + +with a namespace of "doi", the following GET request will return information about the namespace: + +``` +GET https://registry.api.identifiers.org/restApi/namespaces/search/findByPrefix?prefix=doi +``` + +This information then points to resolvers for the "doi" namespace. This "doi" namespace was assigned a namespace ID of 75 by identifiers.org. This "id" has nothing to do with compact identifier accessions (which are used in the URL pattern as `{$id}` below) or DRS IDs. This namespace ID (75 below) is purely an identifiers.org internal ID for use with their APIs: + +``` +GET https://registry.api.identifiers.org/restApi/resources/search/findAllByNamespaceId?id=75 +``` + +This returns enough information to, ultimately, identify one or more resolvers and each have a URL pattern that, for DRS-supporting systems, provides a URL template for making a successful DRS GET request. For example, the DOI urlPattern is: + +``` +urlPattern: "https://doi.org/{$id}" +``` + +And the `{$id}` here refers to the accession from the compact identifier (in this example the accession is `10.5072/FK2805660V`). If applicable, a provide code can be supplied in the above requests to specify a particular mirror if there are multiple resolvers for this namespace. In the case of DOIs, you only get a single resolver. + +Given this information you now know you can make a GET on the URL: + +``` +GET https://doi.org/10.5072/FK2805660V +``` + +*The URL above is valid for a DOI object but it is not actually a DRS server! Instead, it redirects to a DRS server through a series of HTTPS redirects. This is likely to be common when working with existing compact identifiers like DOIs or ARKs. Regardless, the redirect should eventually lead to a DRS URL that percent-encodes the accession as a DRS ID in a DRS object API call. For a **hypothetical** example, here’s what a redirect to a DRS API URL might ultimately look. A client doesn’t have to do anything other than follow the HTTPS redirects. The link between the DOI resolver on doi.org and the DRS server URL below is the result of the DRS server registering their data objects with a DOI issuer.* + +``` +GET https://drs.example.org/ga4gh/drs/v1/objects/10.5072%2FFK2805660V +``` + +IDs in DRS hostname-based URIs/URLs are always percent-encoded to eliminate ambiguity even though the DRS compact identifier-based URIs and the identifier.orgs API do not percent-encode accessions. This was done in order to 1) follow the CURIE conventions of identifiers.org/n2t.net for compact identifier-based DRS URIs and 2) to aid in readability for users who understand they are working with compact identifiers. **The general rule of thumb, when using a compact identifier accession as a DRS ID in a DRS API call, make sure to percent-encode it. An easy way for a DRS client to handle this is to get the initial DRS object JSON response from whatever redirects the compact identifier resolves to, then look for the** `self_uri` **in the JSON, which will give you the correctly percent-encoded DRS ID for subsequent DRS API calls such as the** `access` **method.** diff --git a/pages/more-background-on-compact-identifiers/tags/ExampleRegisterIdentifier.md b/pages/more-background-on-compact-identifiers/tags/ExampleRegisterIdentifier.md new file mode 100644 index 000000000..f08eaa4e8 --- /dev/null +++ b/pages/more-background-on-compact-identifiers/tags/ExampleRegisterIdentifier.md @@ -0,0 +1,49 @@ +See the documentation on [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) for adding your own compact identifier type and registering your DRS server as a resolver. We document this in more detail in the [main specification document](./index.html). + +Now the question is how does a client resolve your newly registered compact identifier for your DRS server? *It turns out, whether specific to a DRS implementation or using existing compact identifiers like ARKs or DOIs, the DRS client resolution process for compact identifier-based URIs is exactly the same.* We briefly run through process below for a new compact identifier as an example but, again, a client will not need to do anything different from the resolution process documented in "DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider". + +Now we can issue DRS URI for our data objects like: + +``` +drs://mydrsprefix:12345 +``` + +This is a little simpler than working with DOIs or other existing compact identifier issuers out there since we can create our own IDs and not have to allocate them through a third-party service (see "Issuing Existing Compact Identifiers for Use with Your DRS Server" below). + +With a namespace of "mydrsprefix", the following GET request will return information about the namespace: + +``` +GET https://registry.api.identifiers.org/restApi/namespaces/search/findByPrefix?prefix=mydrsprefix +``` + +*Of course, this is a hypothetical example so the actual API call won’t work but you can see the GET request is identical to "DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider".* + +This information then points to resolvers for the "mydrsprefix" namespace. Hypothetically, this "mydrsprefix" namespace was assigned a namespace ID of 1829 by identifiers.org. This "id" has nothing to do with compact identifier accessions (which are used in the URL pattern as `{$id}` below) or DRS IDs. This namespace ID (1829 below) is purely an identifiers.org internal ID for use with their APIs: + +``` +GET https://registry.api.identifiers.org/restApi/resources/search/findAllByNamespaceId?id=1829 +``` + +*Like the previous GET request this URL won’t work but you can see the GET request is identical to "DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider".* + +This returns enough information to, ultimately, identify one or more resolvers and each have a URL pattern that, for DRS-supporting systems, provides a URL template for making a successful DRS GET request. For example, the "mydrsprefix" urlPattern is: + +``` +urlPattern: "https://mydrs.server.org/ga4gh/drs/v1/objects/{$id}" +``` + +And the `{$id}` here refers to the accession from the compact identifier (in this example the accession is `12345`). If applicable, a provide code can be supplied in the above requests to specify a particular mirror if there are multiple resolvers for this namespace. + +Given this information you now know you can make a GET on the URL: + +``` +GET https://mydrs.server.org/ga4gh/drs/v1/objects/12345 +``` + +So, compared to using a third party service like DOIs and ARKs, this would be a direct pointer to a DRS server. However, just as with "DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider", the client should always be prepared to follow HTTPS redirects. + +*To summarize, a client resolving a custom compact identifier registered for a single DRS server is actually the same as resolving using a third-party compact identifier service like ARKs or DOIs with a DRS server, just make sure to follow redirects in all cases.* + +**Note: Issuing Existing Compact Identifiers for Use with Your DRS Server** + +See the documentation on [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) for information about all the compact identifiers that are supported. You can choose to use an existing compact identifier provider for your DRS server, as we did in the example above using DOIs ("DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider"). Just keep in mind, each provider will have their own approach for generating compact identifiers and associating them with a DRS data object URL. Some compact identifier providers, like DOIs, provide a method whereby you can register in their network and get your own prefix, allowing you to mint your own accessions. Other services, like the University of California’s [EZID](https://ezid.cdlib.org/) service, provide accounts and a mechanism to mint accessions centrally for each of your data objects. For experimentation we recommend you take a look at the EZID website that allows you to create DOIs and ARKs and associate them with your data object URLs on your DRS server for testing purposes. diff --git a/pages/more-background-on-compact-identifiers/tags/RegisteringOnMetaResolver.md b/pages/more-background-on-compact-identifiers/tags/RegisteringOnMetaResolver.md new file mode 100644 index 000000000..9ba58b635 --- /dev/null +++ b/pages/more-background-on-compact-identifiers/tags/RegisteringOnMetaResolver.md @@ -0,0 +1,9 @@ +See the documentation on the [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) meta-resolvers for adding your own compact identifier type and registering your DRS server as a resolver. You can register new prefixes (or mirrors by adding resource provider codes) for free using a simple online form. + +Keep in mind, while anyone can register prefixes, the identifiers.org/n2t.net sites do basic hand curation to verify new prefix and resource (provider code) requests. See those sites for more details on their security practices. For more information see + +Starting with the prefix for our new compact identifier, let’s register the namespace `mydrsprefix` on identifiers.org/n2t.net and use 5-digit numeric IDs as our accessions. We will then link this to the DRS server at https://mydrs.server.org/ga4gh/drs/v1/ by filling in the provider details. Here’s what that the registration for our new namespace looks like on [identifiers.org](https://registry.identifiers.org/prefixregistrationrequest): + +![Prefix Register 1](/data-repository-service-schemas/public/img/prefix_register_1.png) + +![Prefix Register 2](/data-repository-service-schemas/public/img/prefix_register_2.png) diff --git a/sources/img/figure1.png b/public/img/figure1.png similarity index 100% rename from sources/img/figure1.png rename to public/img/figure1.png diff --git a/sources/img/figure2.png b/public/img/figure2.png similarity index 100% rename from sources/img/figure2.png rename to public/img/figure2.png diff --git a/sources/img/figure3.png b/public/img/figure3.png similarity index 100% rename from sources/img/figure3.png rename to public/img/figure3.png diff --git a/sources/img/prefix_register_1.png b/public/img/prefix_register_1.png similarity index 100% rename from sources/img/prefix_register_1.png rename to public/img/prefix_register_1.png diff --git a/sources/img/prefix_register_2.png b/public/img/prefix_register_2.png similarity index 100% rename from sources/img/prefix_register_2.png rename to public/img/prefix_register_2.png diff --git a/sources/md/appendix_background_notes_on_drs_uris.md b/sources/md/appendix_background_notes_on_drs_uris.md deleted file mode 100644 index 416e0c8ac..000000000 --- a/sources/md/appendix_background_notes_on_drs_uris.md +++ /dev/null @@ -1,7 +0,0 @@ -## Design Motivation - -DRS URIs are aligned with the [FAIR data principles](https://www.nature.com/articles/sdata201618) and the [Joint Declaration of Data Citation Principles](https://www.nature.com/articles/sdata20182) — both hostname-based and compact identifier-based URIs provide globally unique, machine-resolvable, persistent identifiers for data. - -* We require all URIs to begin with `drs://` as a signal to humans and systems consuming these URIs that the response they will ultimately receive, after transforming the URI to a fetchable URL, will be a DRS JSON packet. This signal differentiates DRS URIs from the wide variety of other entities (HTML documents, PDFs, ontology notes, etc.) that can be represented by compact identifiers. -* We support hostname-based URIs because of their simplicity and efficiency for server and client implementers. -* We support compact identifier-based URIs, and the meta-resolver services of identifiers.org and n2t.net (Name-to-Thing), because of the wide adoption of compact identifiers in the research community. as detailed by [Wimalaratne et al (2018)](https://www.nature.com/articles/sdata201829) in "Uniform resolution of compact identifiers for biomedical data." diff --git a/sources/md/appendix_compact_identifier_based_uris.md b/sources/md/appendix_compact_identifier_based_uris.md deleted file mode 100644 index b5c09c0f4..000000000 --- a/sources/md/appendix_compact_identifier_based_uris.md +++ /dev/null @@ -1,73 +0,0 @@ -**Note: Identifiers.org/n2t.net API Changes** - -The examples below show the current API interactions with [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) which may change over time. Please refer to the documentation from each site for the most up-to-date information. We will make best efforts to keep the DRS specification current but DRS clients MUST maintain their ability to use either the identifiers.org or n2t.net APIs to resolve compact identifier-based DRS URIs. - -## Registering a DRS Server on a Meta-Resolver - -See the documentation on the [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) meta-resolvers for adding your own compact identifier type and registering your DRS server as a resolver. You can register new prefixes (or mirrors by adding resource provider codes) for free using a simple online form. For more information see [More Background on Compact Identifiers](/data-repository-service-schemas/sources/md/more_background_on_compact_identifiers). - -## Calling Meta-Resolver APIs for Compact Identifier-Based DRS URIs - -Clients resolving Compact Identifier-based URIs need to convert a prefix (e.g. “drs.42”) into an URL pattern. They can do so by calling either the identifiers.org or the n2t.net API, since the two meta-resolvers keep their mapping databases in sync. - -### Calling the identifiers.org API as a Client - -It takes two API calls to get the URL pattern. - -1. The client makes a GET request to identifiers.org to find information about the prefix: - -``` -GET https://registry.api.identifiers.org/restApi/namespaces/search/findByPrefix?prefix=drs.42 -``` - -This request returns a JSON structure including various URLs containing an embedded namespace id, such as: - -``` -"namespace" : { - "href":"https://registry.api.identifiers.org/restApi/namespaces/1234" -} -``` - -2. The client extracts the namespace id (in this example 1234), and uses it to make a second GET request to identifiers.org to find information about the namespace: - -``` -GET https://registry.api.identifiers.org/restApi/resources/search/findAllByNamespaceId?id=1234 -``` - -This request returns a JSON structure including an urlPattern field, whose value is an URL pattern containing a ${id} parameter, such as: - -``` -"urlPattern" : "https://drs.myexample.org/ga4gh/drs/v1/objects/{$id}" -``` - -### Calling the n2t.net API as a Client - -It takes one API call to get the URL pattern. - -The client makes a GET request to n2t.net to find information about the namespace. (Note the trailing colon.) - -``` -GET https://n2t.net/drs.42: -``` - -This request returns a text structure including a redirect field, whose value is an URL pattern containing a `$id` parameter, such as: - -``` -redirect: https://drs.myexample.org/ga4gh/drs/v1/objects/$id -``` - -## Caching with Compact Identifiers - -Identifiers.org/n2t.net compact identifier resolver records do not change frequently. This reality is useful for caching resolver records and their URL patterns for performance reasons. Builders of systems that use compact identifier-based DRS URIs should cache prefix resolver records from identifiers.org/n2t.net and occasionally refresh the records (such as every 24 hours). This approach will reduce the burden on these community services since we anticipate many DRS URIs will be regularly resolved in workflow systems. Alternatively, system builders may decide to directly mirror the registries themselves, instructions are provided on the identifiers.org/n2t.net websites. - -## Security with Compact Identifiers - -As mentioned earlier, identifiers.org/n2t.net performs some basic verification of new prefixes and provider code mirror registrations on their sites. However, builders of systems that consume and resolve DRS URIs may have certain security compliance requirements and regulations that prohibit relying on an external site for resolving compact identifiers. In this case, systems under these security and compliance constraints may wish to whitelist certain compact identifier resolvers and/or vet records from identifiers.org/n2t.net before enabling in their systems. - -## Accession Encoding to Valid DRS IDs - -The compact identifier format used by identifiers.org/n2t.net does not percent-encode reserved URI characters but, instead, relies on the first ":" character to separate prefix from accession. Since these accessions can contain any characters, and characters like "/" will interfere with DRS API calls, you *must* percent encode the accessions extracted from DRS compact identifier-based URIs when using as DRS IDs in subsequent DRS GET requests. An easy way for a DRS client to handle this is to get the initial DRS object JSON response from whatever redirects the compact identifier resolves to, then look for the `self_uri` in the JSON, which will give you the correctly percent-encoded DRS ID for subsequent DRS API calls such as the `access` method. - -## Additional Examples - -For additional examples, see the document [More Background on Compact Identifiers](/data-repository-service-schemas/sources/md/more_background_on_compact_identifiers). \ No newline at end of file diff --git a/sources/md/appendix_hostname_based_uris.md b/sources/md/appendix_hostname_based_uris.md deleted file mode 100644 index 096974dc3..000000000 --- a/sources/md/appendix_hostname_based_uris.md +++ /dev/null @@ -1,7 +0,0 @@ -## Encoding DRS IDs - -In hostname-based DRS URIs, the ID is always percent-encoded to ensure special characters do not interfere with subsequent DRS endpoint calls. As such, ":" is not allowed in the URI and is a convenient way of differentiating from a compact identifier-based DRS URI. Also, if a given DRS service implementation uses compact identifier accessions as their DRS IDs, they must be percent encoded before using them as DRS IDs in hostname-based DRS URIs and subsequent GET requests to a DRS service endpoint. - -## Future DRS Versions and Service Registry/Info - -In the future, as new major versions of DRS are released, a DRS server might support multiple API versions on different URL paths. At that point we expect to add support for [service-registry](https://github.com/ga4gh-discovery/ga4gh-service-registry) and [service-info](https://github.com/ga4gh-discovery/ga4gh-service-info) endpoints to the API, and to update the URI resolution logic to describe how to use those endpoints when translating hostname-based DRS URIs to URLs. diff --git a/sources/md/appendix_motivation.md b/sources/md/appendix_motivation.md deleted file mode 100644 index 125b27867..000000000 --- a/sources/md/appendix_motivation.md +++ /dev/null @@ -1,32 +0,0 @@ -Data sharing requires portable data, consistent with the FAIR data principles (findable, accessible, interoperable, reusable). Today’s researchers and clinicians are surrounded by potentially useful data, but often need bespoke tools and processes to work with each dataset. Today’s data publishers don’t have a reliable way to make their data useful to all (and only) the people they choose. And today’s data controllers are tasked with implementing standard controls of non-standard mechanisms for data access. - -![Figure 1](/data-repository-service-schemas/sources/img/figure1.png) -*Figure 1: there’s an ocean of data, with many different tools to drink from it, but no guarantee that any tool will work with any subset of the data* - -We need a standard way for data producers to make their data available to data consumers, that supports the control needs of the former and the access needs of the latter. And we need it to be interoperable, so anyone who builds access tools and systems can be confident they’ll work with all the data out there, and anyone who publishes data can be confident it will work with all the tools out there. - -![Figure 2](/data-repository-service-schemas/sources/img/figure2.png) -*Figure 2: by defining a standard Data Repository API, and adapting tools to use it, every data publisher can now make their data useful to every data consumer* - -We envision a world where: - -* there are many many **data consumers**, working in research and in care, who can use the tools of their choice to access any and all data that they have permission to see - -* there are many **data access tools** and platforms, supporting discovery, visualization, analysis, and collaboration - -* there are many **data repositories**, each with their own policies and characteristics, which can be accessed by a variety of tools - -* there are many **data publishing tools** and platforms, supporting a variety of data lifecycles and formats - -* there are many many **data producers**, generating data of all types, who can use the tools of their choice to make their data as widely available as is appropriate - -![Figure 3](/data-repository-service-schemas/sources/img/figure3.png) -*Figure 3: a standard Data Repository API enables an ecosystem of data producers and consumers* - -This spec defines a standard **Data Repository Service (DRS) API** (“the yellow box”), to enable that ecosystem of data producers and consumers. Our goal is that the only thing data consumers need to know about a data repo is *\"here’s the DRS endpoint to access it\"*, and the only thing data publishers need to know to tap into the world of consumption tools is *\"here’s how to tell it where my DRS endpoint lives\"*. - -## Federation - -The world’s biomedical data is controlled by groups with very different policies and restrictions on where their data lives and how it can be accessed. A primary purpose of DRS is to support unified access to disparate and distributed data. (As opposed to the alternative centralized model of "let’s just bring all the data into one single data repository”, which would be technically easier but is no more realistic than “let’s just bring all the websites into one single web host”.) - -In a DRS-enabled world, tool builders don’t have to worry about where the data their tools operate on lives — they can count on DRS to give them access. And tool users only need to know which DRS server is managing the data they need, and whether they have permission to access it; they don’t have to worry about how to physically get access to, or (worse) make a copy of the data. For example, if I have appropriate permissions, I can run a pooled analysis where I run a single tool across data managed by different DRS servers, potentially in different locations. diff --git a/sources/md/front_matter.md b/sources/md/front_matter.md deleted file mode 100644 index b40babbdd..000000000 --- a/sources/md/front_matter.md +++ /dev/null @@ -1,154 +0,0 @@ -# Introduction - -The Data Repository Service (DRS) API provides a generic interface to data repositories so data consumers, including workflow systems, can access data objects in a single, standard way regardless of where they are stored and how they are managed. The primary functionality of DRS is to map a logical ID to a means for physically retrieving the data represented by the ID. The sections below describe the characteristics of those IDs, the types of data supported, how they can be pointed to using URIs, and how clients can use these URIs to ultimately make successful DRS API requests. This document also describes the DRS API in detail and provides information on the specific endpoints, request formats, and responses. This specification is intended for developers of DRS-compatible services and of clients that will call these DRS services. - -The key words MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL in this document are to be interpreted as described in [RFC 2119](https://datatracker.ietf.org/doc/html/rfc2119). - -# DRS API Principles - -## DRS IDs - -Each implementation of DRS can choose its own id scheme, as long as it follows these guidelines: - -* DRS IDs are strings made up of uppercase and lowercase letters, decimal digits, hypen, period, underscore and tilde [A-Za-z0-9.-_~]. See [RFC 3986 § 2.3](https://datatracker.ietf.org/doc/html/rfc3986#section-2.3). -* DRS IDs can contain other characters, but they MUST be encoded into valid DRS IDs whenever they are used in API calls. This is because non-encoded IDs may interfere with the interpretation of the objects/{id}/access endpoint. To overcome this limitation use percent-encoding of the ID, see [RFC 3986 § 2.4](https://datatracker.ietf.org/doc/html/rfc3986#section-2.4) -* One DRS ID MUST always return the same object data (or, in the case of a collection, the same set of objects). This constraint aids with reproducibility. -* DRS implementations MAY have more than one ID that maps to the same object. -* DRS version 1.x does NOT support semantics around multiple versions of an object. (For example, there’s no notion of “get latest version” or “list all versions”.) Individual implementations MAY choose an ID scheme that includes version hints. - -## DRS URIs - -For convenience, including when passing content references to a [WES server](https://github.com/ga4gh/workflow-execution-service-schemas), we define a [URI scheme](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#Generic_syntax) for DRS-accessible content. This section documents the syntax of DRS URIs, and the rules clients follow for translating a DRS URI into a URL that they use for making the DRS API calls described in this spec. - -There are two styles of DRS URIs, Hostname-based and Compact Identifier-based, both using the `drs://` URI scheme. DRS servers may choose either style when exposing references to their content;. DRS clients MUST support resolving both styles. - -Tip: -> See [Appendix: Background Notes on DRS URIs](#tag/Appendix:-Background-Notes-on-DRS-URIs) for more information on our design motivations for DRS URIs. - -### Hostname-based DRS URIs - -Hostname-based DRS URIs are simpler than compact identifier-based URIs. They contain the DRS server name and the DRS ID only and can be converted directly into a fetchable URL based on a simple rule. They take the form: - -``` -drs:/// -``` - -DRS URIs of this form mean *\"you can fetch the content with DRS id \ from the DRS server at \\"*. -For example, here are the client resolution steps if the URI is: - -``` -drs://drs.example.org/314159 -``` - -1. The client parses the string to extract the hostname of “drs.example.org” and the id of “314159”. -2. The client makes a GET request to the DRS server, using the standard DRS URL syntax: - -``` -GET https://drs.example.org/ga4gh/drs/v1/objects/314159 -``` - -The protocol is always https and the port is always the standard 443 SSL port. It is invalid to include a different port in a DRS hostname-based URI. - -Tip: -> See the [Appendix: Hostname-Based URIs](#tag/Appendix:-Hostname-Based-URIs) for information on how hostname-based DRS URI resolution to URLs is likely to change in the future, when the DRS v2 major release happens. - -### Compact Identifier-based DRS URIs - -Compact Identifier-based DRS URIs use resolver registry services (specifically, [identifiers.org](https://identifiers.org/) and [n2t.net (Name-To-Thing)](https://n2t.net/)) to provide a layer of indirection between the DRS URI and the DRS server name — the actual DNS name of the DRS server isn’t present in the URI. This approach is based on the Joint Declaration of Data Citation Principles as detailed by [Wimalaratne et al (2018)](https://www.nature.com/articles/sdata201829). - -For more information, see the document [More Background on Compact Identifiers](/data-repository-service-schemas/sources/md/more_background_on_compact_identifiers). - -Compact Identifiers take the form: - -``` -drs://[provider_code/]namespace:accession -``` - -Together, provider code and the namespace are referred to as the `prefix`. The provider code is optional and is used by identifiers.org/n2t.net for compact identifier resolver mirrors. Both the `provider_code` and `namespace` disallow spaces or punctuation, only lowercase alphanumerical characters, underscores and dots are allowed (e.g. [A-Za-z0-9._]). - -Tip: -> See the [Appendix: Compact Identifier-Based URIs](#tag/Appendix:-Compact-Identifier-Based-URIs) for more background on Compact Identifiers and resolver registry services like identifiers.org/n2t.net (aka meta-resolvers), how to register prefixes, possible caching strategies, and security considerations. - -#### For DRS Servers - -If your DRS implementation will issue DRS URIs based *on your own* compact identifiers, you MUST first register a new prefix with identifiers.org (which is automatically mirrored to n2t.net). You will also need to include a provider resolver resource in this registration which links the prefix to your DRS server, so that DRS clients can get sufficient information to make a successful DRS GET request. For clarity, we recommend you choose a namespace beginning with `drs`. - -#### For DRS Clients - -A DRS client parses the DRS URI compact identifier components to extract the prefix and the accession, and then uses meta-resolver APIs to locate the actual DRS server. For example, here are the client resolution steps if the URI is: - -``` -drs://drs.42:314159 -``` - -1. The client parses the string to extract the prefix of `drs.42` and the accession of `314159`, using the first occurrence of a colon (":") character after the initial `drs://` as a delimiter. (The colon character is not allowed in a Hostname-based DRS URI, making it easy to tell them apart.) - -2. The client makes API calls to a meta-resolver to look up the URL pattern for the namespace. (See [Calling Meta-Resolver APIs for Compact Identifier-Based DRS URIs](#section/Calling-Meta-Resolver-APIs-for-Compact-Identifier-Based-DRS-URIs) for details.) The URL pattern is a string containing a `{$id}` parameter, such as: - -``` -https://drs.myexample.org/ga4gh/drs/v1/objects/{$id} -``` - -3. The client generates a DRS URL from the URL template by replacing {$id} with the accession it extracted in step 1. It then makes a GET request to the DRS server: - -``` -GET https://drs.myexample.org/ga4gh/drs/v1/objects/314159 -``` - -4. The client follows any HTTP redirects returned in step 3, in case the resolver goes through an extra layer of redirection. - -For performance reasons, DRS clients SHOULD cache the URL pattern returned in step 2, with a suggested 24 hour cache life. - -### Choosing a URI Style - -DRS servers can choose to issue either hostname-based or compact identifier-based DRS URIs, and can be confident that compliant DRS clients will support both. DRS clients must be able to accommodate both URI types. Tradeoffs that DRS server builders, and third parties who need to cite DRS objects in datasets, workflows or elsewhere, may want to consider include: - -*Table 1: Choosing a URI Style* - -| | Hostname-based | Compact Identifier-based | -|-------------------|----------------|--------------------------| -| URI Durability | URIs are valid for as long as the server operator maintains ownership of the published DNS address. (They can of course point that address at different physical serving infrastructure as often as they’d like.) | URIs are valid for as long as the server operator maintains ownership of the published compact identifier resolver namespace. (They also depend on the meta-resolvers like identifiers.org/n2t.net remaining operational, which is intended to be essentially forever.) | -| Client Efficiency | URIs require minimal client logic, and no network requests, to resolve. | URIs require small client logic, and 1-2 cacheable network requests, to resolve. | -| Security | Servers have full control over their own security practices. | Server operators, in addition to maintaining their own security practices, should confirm they are comfortable with the resolver registry security practices, including protection against denial of service and namespace-hijacking attacks. (See the [Appendix: Compact Identifier-Based URIs](#tag/Appendix:-Compact-Identifier-Based-URIs) for more information on resolver registry security.) | - -## DRS Datatypes - -DRS v1 supports two types of content: - -* a *blob* is like a file — it’s a single blob of bytes, represented by a `DrsObject` without a `contents` array -* a *bundle* is like a folder — it’s a collection of other DRS content (either blobs or bundles), represented by a `DrsObject` with a `contents` array - -## Read-only - -DRS v1 is a read-only API. We expect that each implementation will define its own mechanisms and interfaces (graphical and/or programmatic) for adding and updating data. - -## Standards - -The DRS API specification is written in OpenAPI and embodies a RESTful service philosophy. It uses JSON in requests and responses and standard HTTPS on port 443 for information transport. - -# Authorization & Authentication - -## Making DRS Requests - -The DRS implementation is responsible for defining and enforcing an authorization policy that determines which users are allowed to make which requests. GA4GH recommends that DRS implementations use an OAuth 2.0 [bearer token](https://oauth.net/2/bearer-tokens/), although they can choose other mechanisms if appropriate. - -## Fetching DRS Objects - -The DRS API allows implementers to support a variety of different content access policies, depending on what `AccessMethod` records they return: - -* public content: - * server provides an `access_url` with a `url` and no `headers` - * caller fetches the object bytes without providing any auth info -* private content that requires the caller to have out-of-band auth knowledge (e.g. service account credentials): - * server provides an `access_url` with a `url` and no `headers` - * caller fetches the object bytes, passing the auth info they obtained out-of-band -* private content that requires the caller to pass an Authorization token: - * server provides an `access_url` with a `url` and `headers` - * caller fetches the object bytes, passing auth info via the specified header(s) -* private content that uses an expensive-to-generate auth mechanism (e.g. a signed URL): - * server provides an `access_id` - * caller passes the `access_id` to the `/access` endpoint - * server provides an `access_url` with the generated mechanism (e.g. a signed URL in the `url` field) - * caller fetches the object bytes from the `url` (passing auth info from the specified headers, if any) - -DRS implementers should ensure their solutions restrict access to targets as much as possible, detect attempts to exploit through log monitoring, and they are prepared to take action if an exploit in their DRS implementation is detected. diff --git a/sources/md/more_background_on_compact_identifiers.md b/sources/md/more_background_on_compact_identifiers.md deleted file mode 100644 index 85d9c9d1c..000000000 --- a/sources/md/more_background_on_compact_identifiers.md +++ /dev/null @@ -1,148 +0,0 @@ -## About - -This document contains more examples of resolving compact identifier-based DRS URIs than we could fit in the DRS specification or appendix. It’s provided here for your reference as a supplement to the specification. - -## Background on Compact Identifier-Based URIs - -Compact identifiers refer to locally-unique persistent identifiers that have been namespaced to provide global uniqueness. See ["Uniform resolution of compact identifiers for biomedical data"](https://www.biorxiv.org/content/10.1101/101279v3) for an excellent introduction to this topic. By using compact identifiers in DRS URIs, along with a resolver registry (identifiers.org/n2t.net), systems can identify the current resolver when they need to translate a DRS URI into a fetchable URL. This allows a project to issue compact identifiers in DRS URIs and not be concerned if the project name or DRS hostname changes in the future, the current resolver can always be found through the identifiers.org/n2t.net registries. Together the identifiers.org/n2t.net systems support the resolver lookup for over 700 compact identifiers formats used in the research community, making it possible for a DRS server to use any of these as DRS IDs (or to register a new compact identifier type and resolver service of their own). - -We use a DRS URI scheme rather than [Compact URIs (CURIEs)](https://en.wikipedia.org/wiki/CURIE) directly since we feel that systems consuming DRS objects will be able to better differentiate a DRS URI. CURIEs are widely used in the research community and we feel the fact that they can point to a wide variety of entities (HTML documents, PDFs, identities in data models, etc) makes it more difficult for systems to unambiguously identify entities as DRS objects. - -Still, to make compact identifiers work in DRS URIs we leverage the CURIE format used by identifiers.org/n2t.net. Compact identifiers have the form: - -``` -prefix:accession -``` - -The prefix can be divided into a `provider_code` (optional) and `namespace`. The `accession` here is an Ark, DOI, Data GUID, or another issuers’s local ID for the object being pointed to: - -``` -[provider_code/]namespace:accession -``` - -Both the `provider_code` and `namespace` disallow spaces or punctuation, only lowercase alphanumerical characters, underscores and dots are allowed. - -[Examples](https://n2t.net/e/compact_ids.html) include (from n2t.net): - -``` -PDB:2gc4 -Taxon:9606 -DOI:10.5281/ZENODO.1289856 -ark:/47881/m6g15z54 -IGSN:SSH000SUA -``` - -Tip: -> DRS URIs using compact identifiers with resolvers registered in identifiers.org/n2t.net can be distinguished from the hostname-based DRS URIs below based on the required ":" which is not allowed in hostname-based URI. - -See the documentation on [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) for much more information on the compact identifiers used there and details about the resolution process. - -## Registering a DRS Server on a Meta-Resolver - -See the documentation on the [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) meta-resolvers for adding your own compact identifier type and registering your DRS server as a resolver. You can register new prefixes (or mirrors by adding resource provider codes) for free using a simple online form. - -Keep in mind, while anyone can register prefixes, the identifiers.org/n2t.net sites do basic hand curation to verify new prefix and resource (provider code) requests. See those sites for more details on their security practices. For more information see - -Starting with the prefix for our new compact identifier, let’s register the namespace `mydrsprefix` on identifiers.org/n2t.net and use 5-digit numeric IDs as our accessions. We will then link this to the DRS server at https://mydrs.server.org/ga4gh/drs/v1/ by filling in the provider details. Here’s what that the registration for our new namespace looks like on [identifiers.org](https://registry.identifiers.org/prefixregistrationrequest): - -![Prefix Register 1](/data-repository-service-schemas/sources/img/prefix_register_1.png) - -![Prefix Register 2](/data-repository-service-schemas/sources/img/prefix_register_2.png) - -## Example DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider - -A DRS client identifies the a DRS URI compact identifier components using the first occurance of "/" (optional) and ":" characters. These are not allowed inside the provider_code (optional) or the namespace. The ":" character is not allowed in a Hostname-based DRS URI, providing a convenient mechanism to differentiate them. Once the provider_code (optional) and namespace are extracted from a DRS compact identifier-based URI, a client can use services on identifiers.org to identify available resolvers. - -*Let’s look at a specific example DRS compact identifier-based URI that uses DOIs, a popular compact identifier, and walk through the process that a client would use to resolve it. Keep in mind, the resolution process is the same from the client perspective if a given DRS server is using an existing compact identifier type (DOIs, ARKs, Data GUIDs) or creating their own compact identifier type for their DRS server and registering it on identifiers.org/n2t.net.* - -Starting with the DRS URI: - -``` -drs://doi:10.5072/FK2805660V -``` - -with a namespace of "doi", the following GET request will return information about the namespace: - -``` -GET https://registry.api.identifiers.org/restApi/namespaces/search/findByPrefix?prefix=doi -``` - -This information then points to resolvers for the "doi" namespace. This "doi" namespace was assigned a namespace ID of 75 by identifiers.org. This "id" has nothing to do with compact identifier accessions (which are used in the URL pattern as `{$id}` below) or DRS IDs. This namespace ID (75 below) is purely an identifiers.org internal ID for use with their APIs: - -``` -GET https://registry.api.identifiers.org/restApi/resources/search/findAllByNamespaceId?id=75 -``` - -This returns enough information to, ultimately, identify one or more resolvers and each have a URL pattern that, for DRS-supporting systems, provides a URL template for making a successful DRS GET request. For example, the DOI urlPattern is: - -``` -urlPattern: "https://doi.org/{$id}" -``` - -And the `{$id}` here refers to the accession from the compact identifier (in this example the accession is `10.5072/FK2805660V`). If applicable, a provide code can be supplied in the above requests to specify a particular mirror if there are multiple resolvers for this namespace. In the case of DOIs, you only get a single resolver. - -Given this information you now know you can make a GET on the URL: - -``` -GET https://doi.org/10.5072/FK2805660V -``` - -*The URL above is valid for a DOI object but it is not actually a DRS server! Instead, it redirects to a DRS server through a series of HTTPS redirects. This is likely to be common when working with existing compact identifiers like DOIs or ARKs. Regardless, the redirect should eventually lead to a DRS URL that percent-encodes the accession as a DRS ID in a DRS object API call. For a **hypothetical** example, here’s what a redirect to a DRS API URL might ultimately look. A client doesn’t have to do anything other than follow the HTTPS redirects. The link between the DOI resolver on doi.org and the DRS server URL below is the result of the DRS server registering their data objects with a DOI issuer.* - -``` -GET https://drs.example.org/ga4gh/drs/v1/objects/10.5072%2FFK2805660V -``` - -IDs in DRS hostname-based URIs/URLs are always percent-encoded to eliminate ambiguity even though the DRS compact identifier-based URIs and the identifier.orgs API do not percent-encode accessions. This was done in order to 1) follow the CURIE conventions of identifiers.org/n2t.net for compact identifier-based DRS URIs and 2) to aid in readability for users who understand they are working with compact identifiers. **The general rule of thumb, when using a compact identifier accession as a DRS ID in a DRS API call, make sure to percent-encode it. An easy way for a DRS client to handle this is to get the initial DRS object JSON response from whatever redirects the compact identifier resolves to, then look for the** `self_uri` **in the JSON, which will give you the correctly percent-encoded DRS ID for subsequent DRS API calls such as the** `access` **method.** - -## Example DRS Client Compact Identifier-Based URI Resolution Process - Registering a new Compact Identifier for Your DRS Server - -See the documentation on [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) for adding your own compact identifier type and registering your DRS server as a resolver. We document this in more detail in the [main specification document](#). - -Now the question is how does a client resolve your newly registered compact identifier for your DRS server? *It turns out, whether specific to a DRS implementation or using existing compact identifiers like ARKs or DOIs, the DRS client resolution process for compact identifier-based URIs is exactly the same.* We briefly run through process below for a new compact identifier as an example but, again, a client will not need to do anything different from the resolution process documented in "DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider". - -Now we can issue DRS URI for our data objects like: - -``` -drs://mydrsprefix:12345 -``` - -This is a little simpler than working with DOIs or other existing compact identifier issuers out there since we can create our own IDs and not have to allocate them through a third-party service (see "Issuing Existing Compact Identifiers for Use with Your DRS Server" below). - -With a namespace of "mydrsprefix", the following GET request will return information about the namespace: - -``` -GET https://registry.api.identifiers.org/restApi/namespaces/search/findByPrefix?prefix=mydrsprefix -``` - -*Of course, this is a hypothetical example so the actual API call won’t work but you can see the GET request is identical to "DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider".* - -This information then points to resolvers for the "mydrsprefix" namespace. Hypothetically, this "mydrsprefix" namespace was assigned a namespace ID of 1829 by identifiers.org. This "id" has nothing to do with compact identifier accessions (which are used in the URL pattern as `{$id}` below) or DRS IDs. This namespace ID (1829 below) is purely an identifiers.org internal ID for use with their APIs: - -``` -GET https://registry.api.identifiers.org/restApi/resources/search/findAllByNamespaceId?id=1829 -``` - -*Like the previous GET request this URL won’t work but you can see the GET request is identical to "DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider".* - -This returns enough information to, ultimately, identify one or more resolvers and each have a URL pattern that, for DRS-supporting systems, provides a URL template for making a successful DRS GET request. For example, the "mydrsprefix" urlPattern is: - -``` -urlPattern: "https://mydrs.server.org/ga4gh/drs/v1/objects/{$id}" -``` - -And the `{$id}` here refers to the accession from the compact identifier (in this example the accession is `12345`). If applicable, a provide code can be supplied in the above requests to specify a particular mirror if there are multiple resolvers for this namespace. - -Given this information you now know you can make a GET on the URL: - -``` -GET https://mydrs.server.org/ga4gh/drs/v1/objects/12345 -``` - -So, compared to using a third party service like DOIs and ARKs, this would be a direct pointer to a DRS server. However, just as with "DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider", the client should always be prepared to follow HTTPS redirects. - -*To summarize, a client resolving a custom compact identifier registered for a single DRS server is actually the same as resolving using a third-party compact identifier service like ARKs or DOIs with a DRS server, just make sure to follow redirects in all cases.* - -**Note: Issuing Existing Compact Identifiers for Use with Your DRS Server** - -See the documentation on [n2t.net](https://n2t.net/e/compact_ids.html) and [identifiers.org](https://docs.identifiers.org/) for information about all the compact identifiers that are supported. You can choose to use an existing compact identifier provider for your DRS server, as we did in the example above using DOIs ("DRS Client Compact Identifier-Based URI Resolution Process - Existing Compact Identifier Provider"). Just keep in mind, each provider will have their own approach for generating compact identifiers and associating them with a DRS data object URL. Some compact identifier providers, like DOIs, provide a method whereby you can register in their network and get your own prefix, allowing you to mint your own accessions. Other services, like the University of California’s [EZID](https://ezid.cdlib.org/) service, provide accounts and a mechanism to mint accessions centrally for each of your data objects. For experimentation we recommend you take a look at the EZID website that allows you to create DOIs and ARKs and associate them with your data object URLs on your DRS server for testing purposes. \ No newline at end of file