Skip to content

Commit 4bb220e

Browse files
authored
Merge pull request #43 from uc-cdis/fix/encode_spaces
(PXP-6042): add encoded spaces
2 parents 23a5ecd + d9a2186 commit 4bb220e

File tree

10 files changed

+43
-17
lines changed

10 files changed

+43
-17
lines changed

docs/_build/html/_modules/gen3/tools/indexing/download_manifest.html

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -324,9 +324,13 @@ <h1>Source code for gen3.tools.indexing.download_manifest</h1><div class="highli
324324
<span class="k">for</span> <span class="n">record</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="n">records</span><span class="p">):</span>
325325
<span class="n">manifest_row</span> <span class="o">=</span> <span class="p">[</span>
326326
<span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;did&quot;</span><span class="p">),</span>
327-
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;urls&quot;</span><span class="p">)),</span>
328-
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;authz&quot;</span><span class="p">)),</span>
329-
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;acl&quot;</span><span class="p">)),</span>
327+
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
328+
<span class="p">[</span><span class="n">url</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">,</span> <span class="s2">&quot;%20&quot;</span><span class="p">)</span> <span class="k">for</span> <span class="n">url</span> <span class="ow">in</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;urls&quot;</span><span class="p">)]</span>
329+
<span class="p">),</span>
330+
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
331+
<span class="p">[</span><span class="n">auth</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">,</span> <span class="s2">&quot;%20&quot;</span><span class="p">)</span> <span class="k">for</span> <span class="n">auth</span> <span class="ow">in</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;authz&quot;</span><span class="p">)]</span>
332+
<span class="p">),</span>
333+
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">a</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">,</span> <span class="s2">&quot;%20&quot;</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;acl&quot;</span><span class="p">)]),</span>
330334
<span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;hashes&quot;</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;md5&quot;</span><span class="p">),</span>
331335
<span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;size&quot;</span><span class="p">),</span>
332336
<span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;file_name&quot;</span><span class="p">),</span>

docs/_build/html/_modules/gen3/tools/indexing/index_manifest.html

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,15 +267,15 @@ <h1>Source code for gen3.tools.indexing.index_manifest</h1><div class="highlight
267267
<span class="k">try</span><span class="p">:</span>
268268
<span class="n">urls</span> <span class="o">=</span> <span class="p">(</span>
269269
<span class="p">[</span>
270-
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
270+
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;%20&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
271271
<span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">_standardize_str</span><span class="p">(</span><span class="n">fi</span><span class="p">[</span><span class="s2">&quot;url&quot;</span><span class="p">])</span><span class="o">.</span><span class="n">strip</span><span class="p">()[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">)</span>
272272
<span class="p">]</span>
273273
<span class="k">if</span> <span class="s2">&quot;url&quot;</span> <span class="ow">in</span> <span class="n">fi</span> <span class="ow">and</span> <span class="n">fi</span><span class="p">[</span><span class="s2">&quot;url&quot;</span><span class="p">]</span> <span class="o">!=</span> <span class="s2">&quot;[]&quot;</span>
274274
<span class="k">else</span> <span class="p">[]</span>
275275
<span class="p">)</span>
276276
<span class="n">authz</span> <span class="o">=</span> <span class="p">(</span>
277277
<span class="p">[</span>
278-
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
278+
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;%20&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
279279
<span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">_standardize_str</span><span class="p">(</span><span class="n">fi</span><span class="p">[</span><span class="s2">&quot;authz&quot;</span><span class="p">])</span><span class="o">.</span><span class="n">strip</span><span class="p">()[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">)</span>
280280
<span class="p">]</span>
281281
<span class="k">if</span> <span class="s2">&quot;authz&quot;</span> <span class="ow">in</span> <span class="n">fi</span> <span class="ow">and</span> <span class="n">fi</span><span class="p">[</span><span class="s2">&quot;authz&quot;</span><span class="p">]</span> <span class="o">!=</span> <span class="s2">&quot;[]&quot;</span>
@@ -288,7 +288,7 @@ <h1>Source code for gen3.tools.indexing.index_manifest</h1><div class="highlight
288288
<span class="k">else</span><span class="p">:</span>
289289
<span class="n">acl</span> <span class="o">=</span> <span class="p">(</span>
290290
<span class="p">[</span>
291-
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
291+
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;%20&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
292292
<span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">_standardize_str</span><span class="p">(</span><span class="n">fi</span><span class="p">[</span><span class="s2">&quot;acl&quot;</span><span class="p">])</span>
293293
<span class="o">.</span><span class="n">strip</span><span class="p">()[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
294294
<span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">)</span>

docs/_build/html/_modules/gen3/tools/indexing/verify_manifest.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ <h1>Source code for gen3.tools.indexing.verify_manifest</h1><div class="highligh
404404
<span class="n">output</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">guid</span><span class="si">}</span><span class="s2">|md5|expected </span><span class="si">{</span><span class="n">md5</span><span class="si">}</span><span class="s2">|actual </span><span class="si">{</span><span class="n">actual_record</span><span class="p">[</span><span class="s1">&#39;hashes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;md5&#39;</span><span class="p">)</span><span class="si">}</span><span class="se">\n</span><span class="s2">&quot;</span>
405405
<span class="k">await</span> <span class="n">output_queue</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">output</span><span class="p">)</span>
406406
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">output</span><span class="p">)</span>
407-
407+
<span class="n">urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">url</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;%20&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span> <span class="k">for</span> <span class="n">url</span> <span class="ow">in</span> <span class="n">urls</span><span class="p">]</span>
408408
<span class="k">if</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">urls</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">actual_record</span><span class="p">[</span><span class="s2">&quot;urls&quot;</span><span class="p">]):</span>
409409
<span class="n">output</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">guid</span><span class="si">}</span><span class="s2">|urls|expected </span><span class="si">{</span><span class="n">urls</span><span class="si">}</span><span class="s2">|actual </span><span class="si">{</span><span class="n">actual_record</span><span class="p">[</span><span class="s1">&#39;urls&#39;</span><span class="p">]</span><span class="si">}</span><span class="se">\n</span><span class="s2">&quot;</span>
410410
<span class="k">await</span> <span class="n">output_queue</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">output</span><span class="p">)</span>

docs/_build/html/tools/indexing.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ <h1>Indexing Tools<a class="headerlink" href="#indexing-tools" title="Permalink
312312

313313
<dl class="py function">
314314
<dt id="gen3.tools.indexing.verify_manifest.async_verify_object_manifest">
315-
<em class="property">async </em><code class="sig-prename descclassname">gen3.tools.indexing.verify_manifest.</code><code class="sig-name descname">async_verify_object_manifest</code><span class="sig-paren">(</span><em class="sig-param">commons_url</em>, <em class="sig-param">manifest_file</em>, <em class="sig-param">max_concurrent_requests=24</em>, <em class="sig-param">manifest_row_parsers={'acl': &lt;function _get_acl_from_row&gt;</em>, <em class="sig-param">'authz': &lt;function _get_authz_from_row&gt;</em>, <em class="sig-param">'file_name': &lt;function _get_file_name_from_row&gt;</em>, <em class="sig-param">'file_size': &lt;function _get_file_size_from_row&gt;</em>, <em class="sig-param">'guid': &lt;function _get_guid_from_row&gt;</em>, <em class="sig-param">'md5': &lt;function _get_md5_from_row&gt;</em>, <em class="sig-param">'urls': &lt;function _get_urls_from_row&gt;}</em>, <em class="sig-param">manifest_file_delimiter=None</em>, <em class="sig-param">output_filename='verify-manifest-errors-1588359228.0173433.log'</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Permalink to this definition"></a></dt>
315+
<em class="property">async </em><code class="sig-prename descclassname">gen3.tools.indexing.verify_manifest.</code><code class="sig-name descname">async_verify_object_manifest</code><span class="sig-paren">(</span><em class="sig-param">commons_url</em>, <em class="sig-param">manifest_file</em>, <em class="sig-param">max_concurrent_requests=24</em>, <em class="sig-param">manifest_row_parsers={'acl': &lt;function _get_acl_from_row&gt;</em>, <em class="sig-param">'authz': &lt;function _get_authz_from_row&gt;</em>, <em class="sig-param">'file_name': &lt;function _get_file_name_from_row&gt;</em>, <em class="sig-param">'file_size': &lt;function _get_file_size_from_row&gt;</em>, <em class="sig-param">'guid': &lt;function _get_guid_from_row&gt;</em>, <em class="sig-param">'md5': &lt;function _get_md5_from_row&gt;</em>, <em class="sig-param">'urls': &lt;function _get_urls_from_row&gt;}</em>, <em class="sig-param">manifest_file_delimiter=None</em>, <em class="sig-param">output_filename='verify-manifest-errors-1589556920.2149627.log'</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Permalink to this definition"></a></dt>
316316
<dd><p>Verify all file object records into a manifest csv</p>
317317
<dl class="field-list simple">
318318
<dt class="field-odd">Parameters</dt>

docs/_build/html/tools/metadata.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ <h1>Metadata Tools<a class="headerlink" href="#metadata-tools" title="Permalink
101101

102102
<dl class="py function">
103103
<dt id="gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest">
104-
<em class="property">async </em><code class="sig-prename descclassname">gen3.tools.metadata.ingest_manifest.</code><code class="sig-name descname">async_ingest_metadata_manifest</code><span class="sig-paren">(</span><em class="sig-param">commons_url</em>, <em class="sig-param">manifest_file</em>, <em class="sig-param">metadata_source</em>, <em class="sig-param">auth=None</em>, <em class="sig-param">max_concurrent_requests=24</em>, <em class="sig-param">manifest_row_parsers={'guid_for_row': &lt;function _get_guid_for_row&gt;</em>, <em class="sig-param">'indexed_file_object_guid': &lt;function _query_for_associated_indexd_record_guid&gt;}</em>, <em class="sig-param">manifest_file_delimiter=None</em>, <em class="sig-param">output_filename='ingest-metadata-manifest-errors-1588359228.3701751.log'</em>, <em class="sig-param">get_guid_from_file=True</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Permalink to this definition"></a></dt>
104+
<em class="property">async </em><code class="sig-prename descclassname">gen3.tools.metadata.ingest_manifest.</code><code class="sig-name descname">async_ingest_metadata_manifest</code><span class="sig-paren">(</span><em class="sig-param">commons_url</em>, <em class="sig-param">manifest_file</em>, <em class="sig-param">metadata_source</em>, <em class="sig-param">auth=None</em>, <em class="sig-param">max_concurrent_requests=24</em>, <em class="sig-param">manifest_row_parsers={'guid_for_row': &lt;function _get_guid_for_row&gt;</em>, <em class="sig-param">'indexed_file_object_guid': &lt;function _query_for_associated_indexd_record_guid&gt;}</em>, <em class="sig-param">manifest_file_delimiter=None</em>, <em class="sig-param">output_filename='ingest-metadata-manifest-errors-1589556920.5896738.log'</em>, <em class="sig-param">get_guid_from_file=True</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Permalink to this definition"></a></dt>
105105
<dd><p>Ingest all metadata records into a manifest csv</p>
106106
<dl class="field-list simple">
107107
<dt class="field-odd">Parameters</dt>

gen3/tools/indexing/download_manifest.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -291,9 +291,13 @@ async def _parse_from_queue(queue):
291291
for record in list(records):
292292
manifest_row = [
293293
record.get("did"),
294-
" ".join(record.get("urls")),
295-
" ".join(record.get("authz")),
296-
" ".join(record.get("acl")),
294+
" ".join(
295+
[url.replace(" ", "%20") for url in record.get("urls")]
296+
),
297+
" ".join(
298+
[auth.replace(" ", "%20") for auth in record.get("authz")]
299+
),
300+
" ".join([a.replace(" ", "%20") for a in record.get("acl")]),
297301
record.get("hashes", {}).get("md5"),
298302
record.get("size"),
299303
record.get("file_name"),

0 commit comments

Comments
 (0)