Skip to content

Commit 026ba23

Browse files
Merge pull request #133 from uc-cdis/fix/index_verify_fix
Catch and log warning if manifest verify doesnt find index record
2 parents e03a19e + af155a9 commit 026ba23

File tree

8 files changed

+23
-5
lines changed

8 files changed

+23
-5
lines changed
214 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
-5 Bytes
Binary file not shown.

docs/_build/html/_modules/gen3/tools/indexing/verify_manifest.html

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ <h1>Source code for gen3.tools.indexing.verify_manifest</h1><div class="highligh
7373
<span class="sd"> MAX_CONCURRENT_REQUESTS (int): maximum number of desired concurrent requests across</span>
7474
<span class="sd"> processes/threads</span>
7575
<span class="sd">&quot;&quot;&quot;</span>
76+
<span class="kn">import</span> <span class="nn">aiohttp</span>
7677
<span class="kn">import</span> <span class="nn">asyncio</span>
7778
<span class="kn">import</span> <span class="nn">csv</span>
7879
<span class="kn">from</span> <span class="nn">cdislogging</span> <span class="kn">import</span> <span class="n">get_logger</span>
@@ -455,7 +456,15 @@ <h1>Source code for gen3.tools.indexing.verify_manifest</h1><div class="highligh
455456
<span class="k">if</span> <span class="s2">&quot;https&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">commons_url</span><span class="p">:</span>
456457
<span class="n">ssl</span> <span class="o">=</span> <span class="kc">False</span>
457458

458-
<span class="k">return</span> <span class="k">await</span> <span class="n">index</span><span class="o">.</span><span class="n">async_get_record</span><span class="p">(</span><span class="n">guid</span><span class="p">,</span> <span class="n">_ssl</span><span class="o">=</span><span class="n">ssl</span><span class="p">)</span>
459+
<span class="n">record</span> <span class="o">=</span> <span class="kc">None</span>
460+
461+
<span class="k">try</span><span class="p">:</span>
462+
<span class="k">return</span> <span class="k">await</span> <span class="n">index</span><span class="o">.</span><span class="n">async_get_record</span><span class="p">(</span><span class="n">guid</span><span class="p">,</span> <span class="n">_ssl</span><span class="o">=</span><span class="n">ssl</span><span class="p">)</span>
463+
464+
<span class="k">except</span> <span class="n">aiohttp</span><span class="o">.</span><span class="n">client_exceptions</span><span class="o">.</span><span class="n">ClientResponseError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span>
465+
<span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;couldn&#39;t get record. error: </span><span class="si">{</span><span class="n">exc</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
466+
467+
<span class="k">return</span> <span class="n">record</span>
459468
</pre></div>
460469

461470
</div>

docs/_build/html/searchindex.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/_build/html/tools/indexing.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ <h1>Indexing Tools<a class="headerlink" href="#indexing-tools" title="Permalink
340340

341341
<dl class="py function">
342342
<dt class="sig sig-object py" id="gen3.tools.indexing.verify_manifest.async_verify_object_manifest">
343-
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1654789636.6798043.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Permalink to this definition"></a></dt>
343+
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1654807944.3028057.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Permalink to this definition"></a></dt>
344344
<dd><p>Verify all file object records into a manifest csv</p>
345345
<dl class="field-list simple">
346346
<dt class="field-odd">Parameters<span class="colon">:</span></dt>

docs/_build/html/tools/metadata.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ <h1>Metadata Tools<a class="headerlink" href="#metadata-tools" title="Permalink
104104

105105
<dl class="py function">
106106
<dt class="sig sig-object py" id="gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest">
107-
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1654789636.9773014.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Permalink to this definition"></a></dt>
107+
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1654807944.560571.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Permalink to this definition"></a></dt>
108108
<dd><p>Ingest all metadata records into a manifest csv</p>
109109
<dl class="field-list simple">
110110
<dt class="field-odd">Parameters<span class="colon">:</span></dt>

gen3/tools/indexing/verify_manifest.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def _get_authz_from_row(row):
3939
MAX_CONCURRENT_REQUESTS (int): maximum number of desired concurrent requests across
4040
processes/threads
4141
"""
42+
import aiohttp
4243
import asyncio
4344
import csv
4445
from cdislogging import get_logger
@@ -421,4 +422,12 @@ async def _get_record_from_indexd(guid, commons_url, lock):
421422
if "https" not in commons_url:
422423
ssl = False
423424

424-
return await index.async_get_record(guid, _ssl=ssl)
425+
record = None
426+
427+
try:
428+
return await index.async_get_record(guid, _ssl=ssl)
429+
430+
except aiohttp.client_exceptions.ClientResponseError as exc:
431+
logging.warning(f"couldn't get record. error: {exc}")
432+
433+
return record

0 commit comments

Comments
 (0)