hablapps · nipsn · Mar 11, 2024 · Mar 11, 2024
diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb
@@ -2659,6 +2659,105 @@
     "Example Table."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "7f08eb84",
+   "metadata": {},
+   "source": [
+    "## Comparison\n",
+    "\n",
+    "### Table.isin()\n",
+    "\n",
+    "```\n",
+    "Table.isin(\n",
+    "    values\n",
+    ")\n",
+    "```\n",
+    "\n",
+    "Whether each element in the DataFrame is contained in values.\n",
+    "\n",
+    "**Parameters:**\n",
+    "\n",
+    "| Name             | Type                                | Description                                                                  | Default  |\n",
+    "| :--------------: | :---------------------------------: | :-------------------------------------------------------------------------- | :------: |\n",
+    "| values | Union[List, dict, Table, KeyedTable] | The result will only be true at a location if all the labels match. If values is a dict, the keys must be the column names, which must match. If values is a Table or KeyedTable, then both the index and column labels must match. |  None|\n",
+    "\n",
+    "\n",
+    "**Returns:**\n",
+    "\n",
+    "| Type                      | Description                                     |\n",
+    "| :-----------------------: | :---------------------------------------------- |\n",
+    "| Table | Boolean type Table/KeyedTable showing whether each element in the DataFrame is contained in values.|\n",
+    "\n",
+    "**Examples:**\n",
+    "\n",
+    "Example Table."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f6e453c8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tab = kx.Table(data={'x': list(range(3)), 'y': [\"A\", \"B\", \"C\"]})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aadd23c1",
+   "metadata": {},
+   "source": [
+    "Find if element \"A\" or \"1\" is in the table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d41d40e0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tab.isin([\"A\", 1])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cff856fe",
+   "metadata": {},
+   "source": [
+    "Find if element \"A\" is in colum \"y\":"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bccf59d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tab.isin({\"y\": [\"A\"]})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed704cce",
+   "metadata": {},
+   "source": [
+    "Find if element \"A\" is in the first position of \"y\" column:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "41840cc0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tab.isin(kx.Table(data={\"y\":[\"A\"]}))"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,

diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py
@@ -243,6 +243,76 @@ def abs(self, numeric_only=False):
             tab = _get_numeric_only_subtable(self)
         return q.abs(tab)
 
+    @api_return
+    def isin(self, values):
+        tab = self
+
+        keyed_in = 'KeyedTable' in str(type(tab))
+        keyed_val = 'KeyedTable' in str(type(values))
+
+        false_table = q("""{u:$[99h~type x;cols value x;cols x];
+                            v:(count[u],count[x])#0b;
+                            t:flip u!v;
+                            $[99h~type x;key[x]!t;t]}""", tab)
+
+        isin_tab = q('''{[it;vt;ft]
+                          idxt:raze value flip key it;
+                          colt:1_cols it;
+                          idxv:raze value flip key vt;
+                          colv:1_cols vt;
+                          p:(idxt inter idxv) cross colt inter colv;
+                          cv:{[k1;k2;ti;tv]
+                            enlist[ti[k1][k2]] in enlist[tv[k1][k2]]}[;;it;vt];
+                          vals:flip `x`field`values!flip[p],flip cv .' p;
+                          aux:exec ((`$string field)!values) by x:x from vals;
+                          aux or ft}''')
+
+        gen_idx = q('{flip enlist[`x]!enlist til x}')
+
+        # list (PyKX and Python)
+        if "list" in str(type(values)).lower():
+            return q('{x in y}', tab, values)
+        # table
+        elif q('{98h~type x}', values):
+            if keyed_in != keyed_val:
+                return false_table
+
+            idx_tab = gen_idx(len(tab))
+            idx_values = gen_idx(len(values))
+            return q.value(isin_tab(tab.set_index(idx_tab),
+                                    values.set_index(idx_values),
+                                    false_table.set_index(idx_tab)))
+        # keyed table
+        elif keyed_val:
+            if keyed_in != keyed_val or len(q.key(tab).columns) != len(q.key(values).columns):
+                return false_table
+
+            old_idx_tab = q.key(tab)
+            idx_tab = gen_idx(len(tab))
+
+            idx_values = q('''{kt:flip value flip key x;
+                               kv:flip value flip key y;
+                               flip enlist[`x]!enlist count[kv]#kt?inter[kv;kt]}''', tab, values)
+
+            res =  q.value(isin_tab(q.value(tab).set_index(idx_tab),
+                                    q.value(values).set_index(idx_values),
+                                    q.value(false_table).set_index(idx_tab)))
+
+            return res.set_index(old_idx_tab)
+        # dict
+        elif isinstance(values, dict) or q('{99h~type x}', values):
+            return q('''{[t;d]
+                          tv:$[kt:99h~type t;value t;t];
+                          cd:{[k;t;d]
+                            $[k in key d;
+                              t[k] in d[k];
+                              count[t]#0b]}[;tv;d];
+                          r:flip cols[tv]!cd each cols tv;
+                          $[kt;key[t]!r;r]}
+                     ''', tab, values)
+        else:
+            raise ValueError("Not a valid argument type.")
+
     @convert_result
     def all(self, axis=0, bool_only=False, skipna=True):
         res, cols = preparse_computations(self, axis, skipna, bool_only=bool_only)

diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py
@@ -2109,6 +2109,60 @@ def test_keyed_loc_fixes(q):
         mkt['k1']
 
 
+def test_pandas_isin(kx):
+    tab = kx.q("""([] k1: 0n 1. 0n 2. 0n;
+                      k2: ("A";" ";"B";" ";"A");
+                      k3: (`a;1.;`c;5;`d))""")
+    keyed_tab = kx.q("""([`a`b`c`d`e]
+                        k1: 0n 1. 0n 2. 0n;
+                        k2: ("A";" ";"B";" ";"A");
+                        k3: (`a;1.;`c;5;`d))""")
+
+    multi_keyed_index = kx.q('([]x:0 1 0 1;y:0 0 1 1)')
+    multi_keyed_table = kx.q('''([]a:`foo`bar`baz`qux;
+                                   b:"f"$til 4;
+                                   c:reverse "f"$til 4)''').set_index(multi_keyed_index)
+
+    list_value = kx.q('(`a;1.;"A")')
+    tab_value = kx.q('([] k1: 1. 2. 3.; k2: ("A";"B";"C"))')
+    dict_value = {"k1": [1., 2., 3.]}
+    keyed_tab_value = kx.q('([`a`b] k1: 1. 2.; k2: ("A";"B"))')
+    multi_keyed_value_index = kx.q('([]x:1 1 0;y:0 1 0)')
+    multi_keyed_value = kx.q('''([]a:`bar`foo`foo;
+                                   b:0 1 0)''').set_index(multi_keyed_value_index)
+
+    pd.testing.assert_frame_equal(tab.isin(list_value).pd(),
+                                  tab.pd().isin(list_value.py()))
+    pd.testing.assert_frame_equal(tab.isin(dict_value).pd(),
+                                  tab.pd().isin(dict_value))
+    pd.testing.assert_frame_equal(tab.isin(tab_value).pd(),
+                                  tab.pd().isin(tab_value.pd()))
+    pd.testing.assert_frame_equal(tab.isin(keyed_tab_value).pd(),
+                                  tab.pd().isin(keyed_tab_value.pd()))
+
+    pd.testing.assert_frame_equal(keyed_tab.isin(list_value).pd(),
+                                  keyed_tab.pd().isin(list_value.py()))
+    pd.testing.assert_frame_equal(keyed_tab.isin(dict_value).pd(),
+                                  keyed_tab.pd().isin(dict_value))
+    pd.testing.assert_frame_equal(keyed_tab.isin(tab_value).pd(),
+                                  keyed_tab.pd().isin(tab_value.pd()))
+    pd.testing.assert_frame_equal(keyed_tab.isin(keyed_tab_value).pd(),
+                                  keyed_tab.pd().isin(keyed_tab_value.pd()))
+    pd.testing.assert_frame_equal(keyed_tab.isin(multi_keyed_value).pd(),
+                                  keyed_tab.pd().isin(multi_keyed_value.pd()))
+
+    pd.testing.assert_frame_equal(multi_keyed_table.isin(list_value).pd(),
+                                  multi_keyed_table.pd().isin(list_value.py()))
+    pd.testing.assert_frame_equal(multi_keyed_table.isin(dict_value).pd(),
+                                  multi_keyed_table.pd().isin(dict_value))
+    pd.testing.assert_frame_equal(multi_keyed_table.isin(tab_value).pd(),
+                                  multi_keyed_table.pd().isin(tab_value.pd()))
+    pd.testing.assert_frame_equal(multi_keyed_table.isin(multi_keyed_value).pd(),
+                                  multi_keyed_table.pd().isin(multi_keyed_value.pd()))
+    pd.testing.assert_frame_equal(multi_keyed_table.isin(keyed_tab_value).pd(),
+                                  multi_keyed_table.pd().isin(keyed_tab_value.pd()))
+
+
 def test_pandas_count(q):
     tab = q('([] k1: 0n 2 0n 2 0n ; k2: (`a;`;`b;`;`c))')
     df = tab.pd()