Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ma order repair #67

Merged
merged 17 commits into from
Nov 23, 2024
4 changes: 4 additions & 0 deletions benchmarks/benchmarks/bench_ma_order.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

length = [5, 50, 500, 5000, 50000, 500000, 5000000, 50000000]
skip = [
(50000, "Worst"),
(50000, "DNA"),
(50000, "Best"),
(50000, "Normal"),
(500000, "Worst"),
(500000, "DNA"),
(500000, "Best"),
Expand Down
33 changes: 9 additions & 24 deletions src/foapy/ma/alphabet.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import numpy.ma as ma

from foapy.exceptions import InconsistentOrderException, Not1DArrayException
from foapy.exceptions import Not1DArrayException


def alphabet(X) -> np.ma.MaskedArray:
Expand All @@ -28,7 +28,7 @@ def alphabet(X) -> np.ma.MaskedArray:
>>> masked_a = ma.masked_array(a, mask)
>>> b = ma_alphabet(masked_a)
>>> b
['a' 'c' -- 'd']
['a' 'c' 'd']

----2----
>>> a = ['a', 'c', 'c', 'e', 'd', 'a']
Expand Down Expand Up @@ -60,15 +60,15 @@ def alphabet(X) -> np.ma.MaskedArray:
>>> masked_a = ma.masked_array(a, mask)
>>> b = ma_alphabet(masked_a)
>>> b
['а' -- 'c']
['а' 'c']

----6----
>>> a = ['a', 'b', 'c', 'a', 'b', 'c', 'c', 'c', 'b', 'a', 'c', 'b', 'c']
>>> mask = [0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1]
>>> masked_a = ma.masked_array(a, mask)
>>> b = ma_alphabet(masked_a)
>>> b
['а' -- --]
['а']

----7----
>>> a = ['a', 'b', 'c', 'a', 'b', 'c', 'c', 'c', 'b', 'a', 'c', 'b', 'c']
Expand All @@ -92,30 +92,15 @@ def alphabet(X) -> np.ma.MaskedArray:
{"message": f"Incorrect array form. Expected d1 array, exists {X.ndim}"}
)

data = ma.getdata(X)
perm = data.argsort(kind="mergesort")
mask = ma.getmask(X)
perm = X.argsort(kind="mergesort")

mask_shape = data.shape
mask_shape = X.shape
unique_mask = np.empty(mask_shape, dtype=bool)
unique_mask[:1] = True
unique_mask[1:] = data[perm[1:]] != data[perm[:-1]]

first_appears_indecies = np.argwhere(unique_mask).ravel()
count_true_in_mask_by_slice = np.add.reduceat(
ma.getmaskarray(X[perm]), first_appears_indecies
)
slice_length = np.diff(np.r_[first_appears_indecies, len(X)])
consistency_index = count_true_in_mask_by_slice / slice_length
consistency_errors = np.argwhere(
(consistency_index != 0) & (consistency_index != 1)
).ravel()
if len(consistency_errors) > 0:
i = data[consistency_errors[0]]
raise InconsistentOrderException(
{"message": f"Element '{i}' have mask and unmasked appearance"}
)
unique_mask[1:] = X[perm[1:]] != X[perm[:-1]]
unique_mask = np.logical_and(unique_mask, ~mask[perm])

result_mask = np.full_like(unique_mask, False)
result_mask[:1] = True
result_mask[perm[unique_mask]] = True
return X[result_mask]
28 changes: 20 additions & 8 deletions src/foapy/ma/order.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
from foapy import order as general_order
from foapy.exceptions import Not1DArrayException

from . import alphabet


def order(X, return_alphabet=False) -> np.ma.MaskedArray:
"""
Expand Down Expand Up @@ -116,16 +114,30 @@ def order(X, return_alphabet=False) -> np.ma.MaskedArray:
{"message": f"Incorrect array form. Expected d1 array, exists {X.ndim}"}
)

alphabet_values = alphabet(X)
order = general_order(ma.getdata(X))
order, alphabet_values = general_order(ma.getdata(X), return_alphabet=True)

power = len(alphabet_values)
length = len(X)

result = np.tile(order, power).reshape(power, length)
result_data = np.tile(order, power).reshape(power, length)
alphabet_indecies = np.arange(power).reshape(power, 1)
mask = result != alphabet_indecies
result_mask = result_data != alphabet_indecies

indecies_selector = np.any(~np.logical_or(result_mask, ma.getmaskarray(X)), axis=1)

if np.any(indecies_selector):
result_data = result_data[indecies_selector]
result_mask = result_mask[indecies_selector]
else:
# If all items are masked we need define empty array explicity
# otherwise, the result shape would be (0, length)
# that affect compare arrays
# (test tests/test_ma_order.py::TestMaOrder::test_void_int_values_with_mask)
result_data = []
result_mask = []

result = ma.masked_array(result_data, mask=result_mask)

if return_alphabet: # Checking for get alphabet (optional)
return ma.masked_array(result, mask=mask), alphabet_values
return ma.masked_array(result, mask=mask)
return result, alphabet_values[indecies_selector]
return result
29 changes: 13 additions & 16 deletions tests/test_ma_alphabet.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from unittest import TestCase

import numpy as np
import numpy.ma as ma
import pytest
from numpy.ma.testutils import assert_equal

from foapy.exceptions import InconsistentOrderException, Not1DArrayException
from foapy.exceptions import Not1DArrayException
from foapy.ma import alphabet


Expand All @@ -15,63 +16,59 @@ class TestMaAlphabet(TestCase):

def test_string_values_with_mask(self):
X = ma.masked_array(["a", "c", "c", "e", "d", "a"], mask=[0, 0, 0, 1, 0, 0])
expected = ma.masked_array(["a", "c", "e", "d"], mask=[0, 0, 1, 0])
expected = ["a", "c", "d"]
exists = alphabet(X)
assert_equal(expected, exists)

def test_string_values_with_no_mask(self):
X = ma.masked_array(["a", "c", "c", "e", "d", "a"], mask=[0, 0, 0, 0, 0, 0])
expected = ma.masked_array(["a", "c", "e", "d"], mask=[0, 0, 0, 0])
expected = ["a", "c", "e", "d"]
exists = alphabet(X)
assert_equal(expected, exists)

def test_integer_values_with_no_mask(self):
X = ma.masked_array([1, 2, 2, 3, 4, 1], mask=[0, 0, 0, 0, 0, 0])
expected = ma.masked_array([1, 2, 3, 4], mask=[0, 0, 0, 0])
expected = [1, 2, 3, 4]
exists = alphabet(X)
assert_equal(expected, exists)

def test_integer_values_with_mask(self):
X = ma.masked_array([1, 2, 2, 3, 4, 1], mask=[1, 0, 0, 0, 0, 1])
expected = ma.masked_array([1, 2, 3, 4], mask=[1, 0, 0, 0])
expected = [2, 3, 4]
exists = alphabet(X)
assert_equal(expected, exists)

def test_with_single_integer_value(self):
X = ma.masked_array([1], mask=[0])
expected = ma.masked_array([1], mask=[0])
expected = [1]
exists = alphabet(X)
assert_equal(expected, exists)

def test_with_single_string_value_with_mask(self):
X = ma.masked_array(["a"], mask=[1])
expected = ma.masked_array(["a"], mask=[1])
expected = np.asanyarray([], dtype=X.dtype)
exists = alphabet(X)
assert_equal(expected, exists)

def test_with_no_values(self):
X = ma.masked_array([], mask=[])
expected = ma.masked_array([], mask=[])
expected = []
exists = alphabet(X)
assert_equal(expected, exists)

def test_several_mask_obj(self):
X = ma.masked_array(["a", "b", "c", "c", "b", "a"], mask=[0, 1, 1, 1, 1, 0])
expected = ma.masked_array(["a", "b", "c"], mask=[0, 1, 1])
expected = ["a"]
exists = alphabet(X)
assert_equal(expected, exists)

def test_with_exception(self):
X = ma.masked_array(
["a", "b", "c", "a", "b", "c", "b", "a"], mask=[0, 1, 0, 0, 0, 0, 1, 0]
)

with pytest.raises(InconsistentOrderException) as e_info:
alphabet(X)
self.assertEqual(
"Element b have mask and unmasked appearance",
e_info.message,
)
expected = ["a", "c", "b"]
exists = alphabet(X)
assert_equal(expected, exists)

def test_with_d2_array_exception(self):
X = ma.masked_array([[2, 2, 2], [2, 2, 2]], mask=[[0, 0, 0], [0, 0, 0]])
Expand Down
58 changes: 49 additions & 9 deletions tests/test_ma_order.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from numpy.ma.testutils import assert_equal

from foapy.exceptions import InconsistentOrderException, Not1DArrayException
from foapy.exceptions import Not1DArrayException
from foapy.ma import order


Expand Down Expand Up @@ -37,11 +37,10 @@ def test_string_values_with_mask(self):
expected = ma.masked_array(
[
[0, None, 0, None, None],
[None, 1, None, None, None],
[None, None, None, 2, None],
[None, None, None, None, 3],
],
mask=[[0, 1, 0, 1, 1], [1, 0, 1, 1, 1], [1, 1, 1, 0, 1], [1, 1, 1, 1, 0]],
mask=[[0, 1, 0, 1, 1], [1, 1, 1, 0, 1], [1, 1, 1, 1, 0]],
)
exists = order(X)
assert_equal(expected, exists)
Expand Down Expand Up @@ -168,10 +167,51 @@ def test_with_exception(self):
X = ma.masked_array(
["a", "b", "c", "a", "b", "c", "b", "a"], mask=[0, 1, 0, 0, 0, 0, 1, 0]
)
expected = ma.masked_array(
[
[0, None, None, 0, None, None, None, 0],
[None, None, 1, None, None, 1, None, None],
[None, None, None, None, 2, None, None, None],
],
mask=[
[0, 1, 1, 0, 1, 1, 1, 0],
[1, 1, 0, 1, 1, 0, 1, 1],
[1, 1, 1, 1, 0, 1, 1, 1],
],
)
exists = order(X)
assert_equal(expected, exists)

with pytest.raises(InconsistentOrderException) as e_info:
order(X)
self.assertEqual(
"Element b have mask and unmasked appearance",
e_info.message,
)
def test_int_values_with_mask(self):
X = ma.masked_array([1, 2, 1, 1, 4], mask=[0, 1, 0, 0, 0])
expected = ma.masked_array(
[
[0, None, 0, 0, None],
[None, None, None, None, 2],
],
mask=[[0, 1, 0, 0, 1], [1, 1, 1, 1, 0]],
)
exists = order(X)
assert_equal(expected, exists)

def test_void_int_values_with_mask(self):
X = ma.masked_array([1], mask=[1])
expected = ma.masked_array(
[],
mask=[],
)
exists = order(X)
assert_equal(expected, exists)

def test_int_values_with_middle_mask(self):
X = ma.masked_array([1, 2, 3, 3, 4, 2], mask=[0, 0, 1, 1, 0, 0])
expected = ma.masked_array(
[
[0, None, None, None, None, None],
[None, 1, None, None, None, 1],
[None, None, None, None, 3, None],
],
mask=[[0, 1, 1, 1, 1, 1], [1, 0, 1, 1, 1, 0], [1, 1, 1, 1, 0, 1]],
)
exists = order(X)
assert_equal(expected, exists)
Loading