Skip to content

Commit bdd1048

Browse files
authored
Merge pull request #206 from uc-cdis/fix/drs-pull-manifest-validation
HP-1330 Fix/drs pull manifest validation
2 parents ca4db3a + ed822ec commit bdd1048

File tree

9 files changed

+10
-8
lines changed

9 files changed

+10
-8
lines changed
258 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

docs/_build/html/_modules/gen3/tools/download/drs_download.html

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ <h1>Source code for gen3.tools.download.drs_download</h1><div class="highlight">
6363
<span class="kn">import</span> <span class="nn">requests</span>
6464
<span class="kn">import</span> <span class="nn">zipfile</span>
6565
<span class="kn">from</span> <span class="nn">cdislogging</span> <span class="kn">import</span> <span class="n">get_logger</span>
66-
<span class="kn">from</span> <span class="nn">dataclasses_json</span> <span class="kn">import</span> <span class="n">dataclass_json</span><span class="p">,</span> <span class="n">LetterCase</span>
66+
<span class="kn">from</span> <span class="nn">dataclasses_json</span> <span class="kn">import</span> <span class="n">dataclass_json</span><span class="p">,</span> <span class="n">LetterCase</span><span class="p">,</span> <span class="n">Undefined</span>
6767
<span class="kn">from</span> <span class="nn">dateutil</span> <span class="kn">import</span> <span class="n">parser</span> <span class="k">as</span> <span class="n">date_parser</span>
6868
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
6969
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlparse</span>
@@ -82,9 +82,10 @@ <h1>Source code for gen3.tools.download.drs_download</h1><div class="highlight">
8282
<span class="n">logger</span> <span class="o">=</span> <span class="n">get_logger</span><span class="p">(</span><span class="s2">&quot;__name__&quot;</span><span class="p">)</span>
8383

8484

85+
<span class="c1"># Add undefined=Undefined.EXCLUDE here because we only cares if the input manifest has the minimal required metadata fields for data download, any extra metadata fields should be ignored and should not cause a failure</span>
8586
<div class="viewcode-block" id="Manifest">
8687
<a class="viewcode-back" href="../../../../tools/drs_pull.html#gen3.tools.download.drs_download.Manifest">[docs]</a>
87-
<span class="nd">@dataclass_json</span><span class="p">(</span><span class="n">letter_case</span><span class="o">=</span><span class="n">LetterCase</span><span class="o">.</span><span class="n">SNAKE</span><span class="p">)</span>
88+
<span class="nd">@dataclass_json</span><span class="p">(</span><span class="n">letter_case</span><span class="o">=</span><span class="n">LetterCase</span><span class="o">.</span><span class="n">SNAKE</span><span class="p">,</span> <span class="n">undefined</span><span class="o">=</span><span class="n">Undefined</span><span class="o">.</span><span class="n">EXCLUDE</span><span class="p">)</span>
8889
<span class="nd">@dataclass</span>
8990
<span class="k">class</span> <span class="nc">Manifest</span><span class="p">:</span>
9091
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Data class representing a Gen3 JSON manifest typically exported from a Gen3 discovery page.</span>

docs/_build/html/searchindex.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/_build/html/tools/indexing.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ <h1>Indexing Tools<a class="headerlink" href="#indexing-tools" title="Link to th
381381

382382
<dl class="py function">
383383
<dt class="sig sig-object py" id="gen3.tools.indexing.verify_manifest.async_verify_object_manifest">
384-
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1707344834.5324836.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Link to this definition"></a></dt>
384+
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1707755397.8761852.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Link to this definition"></a></dt>
385385
<dd><p>Verify all file object records into a manifest csv</p>
386386
<dl class="field-list simple">
387387
<dt class="field-odd">Parameters<span class="colon">:</span></dt>

docs/_build/html/tools/metadata.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ <h1>Metadata Tools<a class="headerlink" href="#metadata-tools" title="Link to th
102102

103103
<dl class="py function">
104104
<dt class="sig sig-object py" id="gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest">
105-
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1707344834.8447776.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Link to this definition"></a></dt>
105+
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1707755398.2227244.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Link to this definition"></a></dt>
106106
<dd><p>Ingest all metadata records into a manifest csv</p>
107107
<dl class="field-list simple">
108108
<dt class="field-odd">Parameters<span class="colon">:</span></dt>

gen3/tools/download/drs_download.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import requests
3232
import zipfile
3333
from cdislogging import get_logger
34-
from dataclasses_json import dataclass_json, LetterCase
34+
from dataclasses_json import dataclass_json, LetterCase, Undefined
3535
from dateutil import parser as date_parser
3636
from tqdm import tqdm
3737
from urllib.parse import urlparse
@@ -50,7 +50,8 @@
5050
logger = get_logger("__name__")
5151

5252

53-
@dataclass_json(letter_case=LetterCase.SNAKE)
53+
# Add undefined=Undefined.EXCLUDE here because we only cares if the input manifest has the minimal required metadata fields for data download, any extra metadata fields should be ignored and should not cause a failure
54+
@dataclass_json(letter_case=LetterCase.SNAKE, undefined=Undefined.EXCLUDE)
5455
@dataclass
5556
class Manifest:
5657
"""Data class representing a Gen3 JSON manifest typically exported from a Gen3 discovery page.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[tool.poetry]
22
name = "gen3"
33
homepage = "https://gen3.org/"
4-
version = "4.22.2"
4+
version = "4.22.4"
55
description = "Gen3 CLI and Python SDK"
66
authors = ["Center for Translational Data Science at the University of Chicago <[email protected]>"]
77
license = "Apache-2.0"

0 commit comments

Comments
 (0)