Skip to content

Commit

Permalink
beta 3
Browse files Browse the repository at this point in the history
  • Loading branch information
CarlKCarlK committed Nov 11, 2023
1 parent 6fc8ae5 commit f620f93
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 7 deletions.
27 changes: 26 additions & 1 deletion bed_reader/_open_bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -1511,11 +1511,36 @@ def _read_csv(filepath, delimiter=None, dtype=None, usecols=None):
# Find the dtype for this column
col_dtype = dtype.get(input_index, np.str_)
# Convert the column list to a numpy array with the specified dtype
columns.append(np.array(col, dtype=col_dtype))
columns.append(_convert_to_dtype(col, col_dtype))

return columns, row_count


def _convert_to_dtype(str_arr, dtype):
assert dtype in [np.str_, np.float32, np.int32] # real assert

if dtype == np.str_:
return str_arr

try:
new_arr = str_arr.astype(dtype)
except ValueError as e:
if dtype == np.float32:
raise e
# for backwards compatibility, see if intermediate float helps int conversion
try:
assert dtype == np.int32 # real assert
float_arr = str_arr.astype(np.float32)
except ValueError:
raise e
new_arr = float_arr.astype(np.int32)
if not np.array_equal(new_arr, float_arr):
raise ValueError(
f"invalid literal for int: '{str_arr[np.where(new_arr != float_arr)][:1]}')"
)
return new_arr


if __name__ == "__main__":
import pytest

Expand Down
33 changes: 33 additions & 0 deletions bed_reader/tests/test_open_bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,39 @@ def test_sparse():
print(val_sparse.shape)


def test_convert_to_dtype():
from bed_reader._open_bed import _convert_to_dtype

input = [
[["a", "b", "c"], ["a", "b", "c"], None, None],
[["1.0", "2.0", "3.0"], ["1.0", "2.0", "3.0"], [1, 2, 3], [1.0, 2.0, 3.0]],
[["1.0", "2.0", "3.5"], ["1.0", "2.0", "3.5"], None, [1.0, 2.0, 3.5]],
[["1", "2", "3"], ["1", "2", "3"], [1, 2, 3], [1.0, 2.0, 3.0]],
[["1", "A", "3"], ["1", "A", "3"], None, None],
]
# convert all to np.array
input = [
[np.array(inner) if inner is not None else None for inner in outer]
for outer in input
]

for ori, exp_str, exp_int, exp_float in input:
for dtype, exp in (
[np.str_, exp_str],
[np.int32, exp_int],
[
np.float32,
exp_float,
],
):
try:
actual = _convert_to_dtype(ori, dtype)
assert np.array_equal(actual, exp)
except ValueError as e:
print(e)
assert exp is None


if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)

Expand Down
31 changes: 28 additions & 3 deletions docs/_modules/bed_reader/_open_bed.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>bed_reader._open_bed &mdash; Bed Reader 1.0.0-beta.2 documentation</title>
<title>bed_reader._open_bed &mdash; Bed Reader 1.0.0-beta.3 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<!--[if lt IE 9]>
Expand All @@ -30,7 +30,7 @@
Bed Reader
</a>
<div class="version">
1.0.0-beta.2
1.0.0-beta.3
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
Expand Down Expand Up @@ -1580,11 +1580,36 @@ <h1>Source code for bed_reader._open_bed</h1><div class="highlight"><pre>
<span class="c1"># Find the dtype for this column</span>
<span class="n">col_dtype</span> <span class="o">=</span> <span class="n">dtype</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">input_index</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">str_</span><span class="p">)</span>
<span class="c1"># Convert the column list to a numpy array with the specified dtype</span>
<span class="n">columns</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">col_dtype</span><span class="p">))</span>
<span class="n">columns</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">_convert_to_dtype</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">col_dtype</span><span class="p">))</span>

<span class="k">return</span> <span class="n">columns</span><span class="p">,</span> <span class="n">row_count</span>


<span class="k">def</span> <span class="nf">_convert_to_dtype</span><span class="p">(</span><span class="n">str_arr</span><span class="p">,</span> <span class="n">dtype</span><span class="p">):</span>
<span class="k">assert</span> <span class="n">dtype</span> <span class="ow">in</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">str_</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">int32</span><span class="p">]</span> <span class="c1"># real assert</span>

<span class="k">if</span> <span class="n">dtype</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">str_</span><span class="p">:</span>
<span class="k">return</span> <span class="n">str_arr</span>

<span class="k">try</span><span class="p">:</span>
<span class="n">new_arr</span> <span class="o">=</span> <span class="n">str_arr</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">dtype</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="k">if</span> <span class="n">dtype</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">e</span>
<span class="c1"># for backwards compatibility, see if intermediate float helps int conversion</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">assert</span> <span class="n">dtype</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">int32</span> <span class="c1"># real assert</span>
<span class="n">float_arr</span> <span class="o">=</span> <span class="n">str_arr</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">e</span>
<span class="n">new_arr</span> <span class="o">=</span> <span class="n">float_arr</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">np</span><span class="o">.</span><span class="n">array_equal</span><span class="p">(</span><span class="n">new_arr</span><span class="p">,</span> <span class="n">float_arr</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;invalid literal for int: &#39;</span><span class="si">{</span><span class="n">str_arr</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">new_arr</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">float_arr</span><span class="p">)][:</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="s2">&#39;)&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">new_arr</span>


<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">pytest</span>

Expand Down
17 changes: 14 additions & 3 deletions docs/_static/_sphinx_javascript_frameworks_compat.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
/* Compatability shim for jQuery and underscores.js.
/*
* _sphinx_javascript_frameworks_compat.js
* ~~~~~~~~~~
*
* Compatability shim for jQuery and underscores.js.
*
* WILL BE REMOVED IN Sphinx 6.0
* xref RemovedInSphinx60Warning
*
* Copyright Sphinx contributors
* Released under the two clause BSD licence
*/

/**
* select a different prefix for underscore
*/
$u = _.noConflict();


/**
* small helper function to urldecode strings
*
Expand Down

0 comments on commit f620f93

Please sign in to comment.