Skip to content

Commit

Permalink
Added implementation and tests for std() function
Browse files Browse the repository at this point in the history
  • Loading branch information
nipsn committed Nov 16, 2023
1 parent 7de2b7c commit cb170cc
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 0 deletions.
23 changes: 23 additions & 0 deletions src/pykx/pandas_api/pandas_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,29 @@ def mean(self, axis: int = 0, numeric_only: bool = False):
tab
)

@api_return
def std(self, axis: int = 0, ddof: int = 1, numeric_only: bool = False):
tab = self
if 'Keyed' in str(type(tab)):
tab = q('{(keys x) _ 0!x}', tab)
if numeric_only:
tab = _get_numeric_only_subtable(tab)
key_str = '' if axis == 0 else '`$string '
val_str = '' if axis == 0 else '"f"$value '
query_str = 'cols[tab]' if axis == 0 else 'til[count[tab]]'
where_str = ' where not (::)~/:r[;1]'

res = q(f'{{[tab]{query_str}!count[{query_str}]#0n}}', tab)
if ddof != len(tab.pd()):
res = q(
'{[tab]'
f'r:{{[tab; x] ({key_str}x; {{avg sqrt (sum xexp[x-(avg x);2]) % count[x]-{ddof}}} {val_str}tab[x])}}[tab;] each {query_str};'
f'(,/) {{(enlist x 0)!(enlist x 1)}} each r{where_str}}}',
tab
)
return res


@api_return
def median(self, axis: int = 0, numeric_only: bool = False):
tab = self
Expand Down
64 changes: 64 additions & 0 deletions tests/test_pandas_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1543,6 +1543,69 @@ def test_mean(kx, q):
q_m = tab.mean(axis=1)


def test_std(kx, q):
df = pd.DataFrame(
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': [7, 11, 14, 14]
}
)
tab = kx.toq(df)
p_m = df.std()
q_m = tab.std()
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()
p_m = df.std(axis=1)
q_m = tab.std(axis=1)
for c in range(len(q.cols(tab))):
assert p_m[c] == q_m[q('{`$string x}', c)].py()
p_m = df.std(ddof=0)
q_m = tab.std(ddof=0)
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()

p_m = df.std(ddof=4)
q_m = tab.std(ddof=4)
for c in q.key(q_m).py():
assert np.isnan(p_m[c]) == np.isnan(q_m[c].py())

q['tab'] = kx.toq(df)
tab = q('1!`idx xcols update idx: til count tab from tab')
p_m = df.std()
q_m = tab.std()
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()
p_m = df.std(axis=1)
q_m = tab.std(axis=1)
for c in range(len(q.cols(tab)) - 1):
assert p_m[c] == q_m[q('{`$string x}', c)].py()

df = pd.DataFrame(
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': ['foo', 'bar', 'baz', 'qux']
}
)
tab = kx.toq(df)
p_m = df.std(numeric_only=True)
q_m = tab.std(numeric_only=True)
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()
p_m = df.std(axis=1, numeric_only=True)
q_m = tab.std(axis=1, numeric_only=True)
for c in range(len(q.cols(tab))):
assert p_m[c] == q_m[q('{`$string x}', c)].py()

with pytest.raises(kx.QError):
q_m = tab.std()
with pytest.raises(kx.QError):
q_m = tab.std(axis=1)


def test_median(kx, q):
df = pd.DataFrame(
{
Expand Down Expand Up @@ -2029,3 +2092,4 @@ def test_keyed_loc_fixes(q):
mkt[['k1', 'y']]
with pytest.raises(KeyError):
mkt['k1']

0 comments on commit cb170cc

Please sign in to comment.