Skip to content

Commit 81851a4

Browse files
Merge pull request #188 from uc-cdis/feat/discovery_objects
initial discovery objects implementation
2 parents 9b35555 + f974907 commit 81851a4

File tree

15 files changed

+942
-92
lines changed

15 files changed

+942
-92
lines changed
3 Bytes
Binary file not shown.
5 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

docs/_build/html/_modules/gen3/auth.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ <h1>Source code for gen3.auth</h1><div class="highlight"><pre>
216216
<span class="n">client_credentials</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
217217
<span class="n">client_scopes</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
218218
<span class="p">):</span>
219-
<span class="n">logging</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">&quot;Initatializing auth..&quot;</span><span class="p">)</span>
219+
<span class="n">logging</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">&quot;Initializing auth..&quot;</span><span class="p">)</span>
220220
<span class="bp">self</span><span class="o">.</span><span class="n">endpoint</span> <span class="o">=</span> <span class="n">remove_trailing_whitespace_and_slashes_in_url</span><span class="p">(</span><span class="n">endpoint</span><span class="p">)</span>
221221
<span class="c1"># note - `_refresh_token` is not actually a JWT refresh token - it&#39;s a</span>
222222
<span class="c1"># gen3 api key with a token as the &quot;api_key&quot; property</span>

docs/_build/html/searchindex.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/_build/html/tools/indexing.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ <h1>Indexing Tools<a class="headerlink" href="#indexing-tools" title="Permalink
380380

381381
<dl class="py function">
382382
<dt class="sig sig-object py" id="gen3.tools.indexing.verify_manifest.async_verify_object_manifest">
383-
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1688591583.648493.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Permalink to this definition"></a></dt>
383+
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1689019167.7611673.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Permalink to this definition"></a></dt>
384384
<dd><p>Verify all file object records into a manifest csv</p>
385385
<dl class="field-list simple">
386386
<dt class="field-odd">Parameters<span class="colon">:</span></dt>

docs/_build/html/tools/metadata.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ <h1>Metadata Tools<a class="headerlink" href="#metadata-tools" title="Permalink
101101

102102
<dl class="py function">
103103
<dt class="sig sig-object py" id="gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest">
104-
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1688591583.9268854.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Permalink to this definition"></a></dt>
104+
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1689019168.1510885.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Permalink to this definition"></a></dt>
105105
<dd><p>Ingest all metadata records into a manifest csv</p>
106106
<dl class="field-list simple">
107107
<dt class="field-odd">Parameters<span class="colon">:</span></dt>

docs/howto/discoveryMetadataTools.md

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44

55
- [Overview](#overview)
66
- [Export Discovery Metadata into File](#export-discovery-metadata-from-file)
7-
- [Publish Discovery Metadata from File]()
7+
- [Publish Discovery Metadata from File](#publish-discovery-metadata-from-file)
88
- [DOIs in Gen3](#dois-in-gen3-discovery-metadata-and-page-for-visualizing-public-doi-metadata)
99
- [dbGaP FHIR Metadata in Gen3 Discovery](#combine-dbgap-fhir-metadata-with-current-discovery-metadata)
10+
- [Publish Discovery Metadata Objects from File](#publish-discovery-metadata-objects-from-file)
1011

1112
### Overview
1213

@@ -529,4 +530,56 @@ def main():
529530

530531
if __name__ == "__main__":
531532
main()
533+
```
534+
535+
### Publish Discovery Metadata Objects from File
536+
Gen3's SDK can be used to ingest data objects related to datasets in Gen3 environment from a file by using the `publish_discovery_object_metadata()` function. To obtain a file of existing metadata objects, use the `output_discovery_objects()` function. By default new objects published from a file are appended to a dataset in a Gen3 environment. If object guids from a file already exist for a dataset in the Gen3 environment, objects are updated. If the `overwrite` option is `True`, all current metadata objects related to a dataset are instead replaced. You can also use this functionality from the CLI. See `gen3 discovery objects --help`
537+
538+
Example of usage:
539+
```python
540+
"""
541+
Example script showing reading Discovery Objects Metadata and then
542+
publishing it back, just to demonstrate the functions.
543+
544+
Before running this, ensure your ~/.gen3/credentials.json contains
545+
an API key for a Gen3 instance to interact with and/or adjust the
546+
Gen3Auth logic to provide auth in another way
547+
"""
548+
from cdislogging import get_logger
549+
550+
from gen3.tools.metadata.discovery_objects import (
551+
publish_discovery_object_metadata,
552+
output_discovery_objects,
553+
)
554+
from gen3.utils import get_or_create_event_loop_for_thread
555+
from gen3.auth import Gen3Auth
556+
557+
logging = get_logger("__name__")
558+
559+
if __name__ == "__main__":
560+
auth = Gen3Auth()
561+
loop = get_or_create_event_loop_for_thread()
562+
logging.info(f"Reading discovery objects metadata from: {auth.endpoint}...")
563+
output_filename = loop.run_until_complete(
564+
output_discovery_objects(
565+
auth,
566+
output_format="tsv",
567+
)
568+
)
569+
logging.info(f"Output discovery objects metadata: {output_filename}")
570+
571+
# Here you can modify the file by hand or in code and then publish to update
572+
# Alternatively, you can skip the read above and just provide a file with
573+
# the object metadata you want to publish
574+
575+
logging.info(
576+
f"publishing discovery object metadata to: {auth.endpoint} from file: {output_filename}"
577+
)
578+
loop.run_until_complete(
579+
publish_discovery_object_metadata(
580+
auth,
581+
output_filename,
582+
overwrite=False,
583+
)
584+
)
532585
```

gen3/auth.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def __init__(
185185
client_credentials=None,
186186
client_scopes=None,
187187
):
188-
logging.debug("Initatializing auth..")
188+
logging.debug("Initializing auth..")
189189
self.endpoint = remove_trailing_whitespace_and_slashes_in_url(endpoint)
190190
# note - `_refresh_token` is not actually a JWT refresh token - it's a
191191
# gen3 api key with a token as the "api_key" property

0 commit comments

Comments
 (0)