Skip to content

Commit

Permalink
Reimplemented isin function
Browse files Browse the repository at this point in the history
  • Loading branch information
nipsn committed Mar 11, 2024
1 parent f90f42b commit 7c06134
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 50 deletions.
105 changes: 64 additions & 41 deletions src/pykx/pandas_api/pandas_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,49 +246,72 @@ def abs(self, numeric_only=False):
@api_return
def isin(self, values):
tab = self
key_table = 'KeyedTable' in str(type(tab))
key_value = 'KeyedTable' in str(type(values))
n_rows = 0
false_dataframe_f = q("""{u:(cols x);

keyed_in = 'KeyedTable' in str(type(tab))
keyed_val = 'KeyedTable' in str(type(values))

false_table = q("""{u:$[99h~type x;cols value x;cols x];
v:(count[u],count[x])#0b;
flip u!v}""")
if key_value and not key_table:
return false_dataframe_f(tab)
if key_table:
kcols = q.key(tab)
if key_value:
n_rows, tab = q("""{n_rows:max 0, count[x]-
count rows:(key y) inter key x;
(n_rows; x each rows)}""", tab, values)
values = q.value(values)
else:
tab = q.value(tab)
dic_value, is_tab = q("""{$[98h = type x;
(flip x; 1b);
(x; 0b)]}""", values)
if key_table and not key_value and is_tab:
ftable = false_dataframe_f(tab)
t:flip u!v;
$[99h~type x;key[x]!t;t]}""", tab)

isin_tab = q('''{[it;vt;ft]
idxt:raze value flip key it;
colt:1_cols it;
idxv:raze value flip key vt;
colv:1_cols vt;
p:(idxt inter idxv) cross colt inter colv;
cv:{[k1;k2;ti;tv]
enlist[ti[k1][k2]] in enlist[tv[k1][k2]]}[;;it;vt];
vals:flip `x`field`values!flip[p],flip cv .' p;
aux:exec ((`$string field)!values) by x:x from vals;
aux or ft}''')

gen_idx = q('{flip enlist[`x]!enlist til x}')

# list (PyKX and Python)
if "list" in str(type(values)).lower():
return q('{x in y}', tab, values)
# table
elif q('{98h~type x}', values):
if keyed_in != keyed_val:
return false_table

idx_tab = gen_idx(len(tab))
idx_values = gen_idx(len(values))
return q.value(isin_tab(tab.set_index(idx_tab),
values.set_index(idx_values),
false_table.set_index(idx_tab)))
# keyed table
elif keyed_val:
if keyed_in != keyed_val or len(q.key(tab).columns) != len(q.key(values).columns):
return false_table

old_idx_tab = q.key(tab)
idx_tab = gen_idx(len(tab))

idx_values = q('''{kt:flip value flip key x;
kv:flip value flip key y;
flip enlist[`x]!enlist count[kv]#kt?inter[kv;kt]}''', tab, values)

res = q.value(isin_tab(q.value(tab).set_index(idx_tab),
q.value(values).set_index(idx_values),
q.value(false_table).set_index(idx_tab)))

return res.set_index(old_idx_tab)
# dict
elif isinstance(values, dict) or q('{99h~type x}', values):
return q('''{[t;d]
tv:$[kt:99h~type t;value t;t];
cd:{[k;t;d]
$[k in key d;
t[k] in d[k];
count[t]#0b]}[;tv;d];
r:flip cols[tv]!cd each cols tv;
$[kt;key[t]!r;r]}
''', tab, values)
else:
ftable = q("""{ [table; values; is_tab; n_rows]
flip (cols table)!
{[col_name; tab; values; v_is_tab; n_rows]
col: tab col_name;
ltype: .Q.ty col;
values: $[99h~type values; values col_name; values];
$[v_is_tab or ltype=" "; ;
values@:where (lower ltype) = .Q.t abs type each values];
$[0 = count values;
(n_rows + count[col])#0b;
$[v_is_tab;
$[any ltype = (" ";"C"); ~'; =]
[mlen#col;mlen#values],
(n_rows + max 0,count[col]-
mlen: min count[values],
count[col])#0b;
any $[any ltype = (" ";"C"); ~/:\:; =\:][values;col]
]]}[; table; values; is_tab; n_rows]
each cols table}""", tab, dic_value, is_tab, n_rows)
return ftable.set_index(kcols) if key_table else ftable
raise ValueError("Not a valid argument type.")

@convert_result
def all(self, axis=0, bool_only=False, skipna=True):
Expand Down
48 changes: 39 additions & 9 deletions tests/test_pandas_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2118,19 +2118,49 @@ def test_pandas_isin(kx):
k2: ("A";" ";"B";" ";"A");
k3: (`a;1.;`c;5;`d))""")

multi_keyed_index = kx.q('([]x:0 1 0 1;y:0 0 1 1)')
multi_keyed_table = kx.q('''([]a:`foo`bar`baz`qux;
b:"f"$til 4;
c:reverse "f"$til 4)''').set_index(multi_keyed_index)

list_value = kx.q('(`a;1.;"A")')
tab_value = kx.q('([] k1: 1. 2. 3.; k2: ("A";"B";"C"))')
dict_value = {"k1": [1., 2., 3.]}
keyed_tab_value = kx.q('([`a`b] k1: 1. 2.; k2: ("A";"B"))')

assert tab.isin(list_value).pd().equals(tab.pd().isin(list_value.py()))
assert tab.isin(tab_value).pd().equals(tab.pd().isin(tab_value.pd()))
assert tab.isin(dict_value).pd().equals(tab.pd().isin(dict_value))
assert tab.isin(keyed_tab_value).pd().equals(tab.pd().isin(keyed_tab_value))
assert keyed_tab.isin(list_value).pd().equals(keyed_tab.pd().isin(list_value.py()))
assert keyed_tab.isin(dict_value).pd().equals(keyed_tab.pd().isin(dict_value))
assert keyed_tab.isin(keyed_tab_value).pd().equals(keyed_tab.pd().isin(keyed_tab_value.pd()))
assert keyed_tab.isin(tab_value).pd().equals(keyed_tab.pd().isin(tab_value))
multi_keyed_value_index = kx.q('([]x:1 1 0;y:0 1 0)')
multi_keyed_value = kx.q('''([]a:`bar`foo`foo;
b:0 1 0)''').set_index(multi_keyed_value_index)

pd.testing.assert_frame_equal(tab.isin(list_value).pd(),
tab.pd().isin(list_value.py()))
pd.testing.assert_frame_equal(tab.isin(dict_value).pd(),
tab.pd().isin(dict_value))
pd.testing.assert_frame_equal(tab.isin(tab_value).pd(),
tab.pd().isin(tab_value.pd()))
pd.testing.assert_frame_equal(tab.isin(keyed_tab_value).pd(),
tab.pd().isin(keyed_tab_value.pd()))

pd.testing.assert_frame_equal(keyed_tab.isin(list_value).pd(),
keyed_tab.pd().isin(list_value.py()))
pd.testing.assert_frame_equal(keyed_tab.isin(dict_value).pd(),
keyed_tab.pd().isin(dict_value))
pd.testing.assert_frame_equal(keyed_tab.isin(tab_value).pd(),
keyed_tab.pd().isin(tab_value.pd()))
pd.testing.assert_frame_equal(keyed_tab.isin(keyed_tab_value).pd(),
keyed_tab.pd().isin(keyed_tab_value.pd()))
pd.testing.assert_frame_equal(keyed_tab.isin(multi_keyed_value).pd(),
keyed_tab.pd().isin(multi_keyed_value.pd()))

pd.testing.assert_frame_equal(multi_keyed_table.isin(list_value).pd(),
multi_keyed_table.pd().isin(list_value.py()))
pd.testing.assert_frame_equal(multi_keyed_table.isin(dict_value).pd(),
multi_keyed_table.pd().isin(dict_value))
pd.testing.assert_frame_equal(multi_keyed_table.isin(tab_value).pd(),
multi_keyed_table.pd().isin(tab_value.pd()))
pd.testing.assert_frame_equal(multi_keyed_table.isin(multi_keyed_value).pd(),
multi_keyed_table.pd().isin(multi_keyed_value.pd()))
pd.testing.assert_frame_equal(multi_keyed_table.isin(keyed_tab_value).pd(),
multi_keyed_table.pd().isin(keyed_tab_value.pd()))


def test_pandas_count(q):
Expand Down

0 comments on commit 7c06134

Please sign in to comment.