Skip to content

Commit

Permalink
TabularData: Add dropna method.
Browse files Browse the repository at this point in the history
This method mimics pandas `dropna`.
  • Loading branch information
daavoo authored and efiop committed Oct 29, 2021
1 parent 4dd5441 commit 24abc6f
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 0 deletions.
30 changes: 30 additions & 0 deletions dvc/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
Mapping,
MutableSequence,
Sequence,
Set,
Tuple,
Union,
overload,
Expand Down Expand Up @@ -181,6 +182,35 @@ def as_dict(
{k: self._columns[k][i] for k in keys} for i in range(len(self))
]

def dropna(self, axis: str = "rows"):
if axis not in ["rows", "cols"]:
raise ValueError(
f"Invalid 'axis' value {axis}."
"Choose one of ['rows', 'cols']"
)

to_drop: Set = set()
for n_row, row in enumerate(self):
for n_col, col in enumerate(row):
if col == self._fill_value:
if axis == "rows":
to_drop.add(n_row)
break
else:
to_drop.add(self.keys()[n_col])

if axis == "rows":
for name in self.keys():
self._columns[name] = Column(
[
x
for n, x in enumerate(self._columns[name])
if n not in to_drop
]
)
else:
self.drop(*to_drop)


def _normalize_float(val: float, precision: int):
return f"{val:.{precision}g}"
Expand Down
33 changes: 33 additions & 0 deletions tests/unit/test_tabular_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,36 @@ def test_row_from_dict():
["", "", "value3", "value4", "", ""],
["", "", "value3", "", "value5", "value6"],
]


@pytest.mark.parametrize(
"axis,expected",
[
(
"rows",
[
["foo", "bar", "foobar"],
],
),
("cols", [["foo"], ["foo"], ["foo"]]),
],
)
def test_dropna(axis, expected):
td = TabularData(["col-1", "col-2", "col-3"])
td.extend([["foo"], ["foo", "bar"], ["foo", "bar", "foobar"]])
assert list(td) == [
["foo", "", ""],
["foo", "bar", ""],
["foo", "bar", "foobar"],
]

td.dropna(axis)

assert list(td) == expected


def test_dropna_invalid_axis():
td = TabularData(["col-1", "col-2", "col-3"])

with pytest.raises(ValueError, match="Invalid 'axis' value foo."):
td.dropna("foo")

0 comments on commit 24abc6f

Please sign in to comment.