testing code and prepping reward functions

landoskape · Apr 16, 2024 · cd66733 · cd66733
1 parent 498eb25
commit cd66733
Show file tree

Hide file tree

Showing 2 changed files with 392 additions and 335 deletions.
diff --git a/dominoes.ipynb b/dominoes.ipynb
@@ -39,17 +39,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 46,
    "id": "40c715a5",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# TODO For refactoring"
+    "# TODO For refactoring\n",
+    "# check reward sequencer function\n",
+    "# -- figure out how to deal with value_method!! --\n",
+    "\n",
+    "\n",
+    "# start working on TSP dataset\n",
+    "# start working on supervised learning child of parent dataset class (and make dominoeDataset an child of that also!)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 47,
    "id": "8404c54b",
    "metadata": {},
    "outputs": [],
@@ -66,20 +72,60 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 52,
    "id": "94b81df6",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "did not recognize value_method, it has to be either 'dominoe' or a string representation of a positive digit",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[52], line 9\u001b[0m\n\u001b[1;32m      7\u001b[0m ib \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m      8\u001b[0m hand \u001b[38;5;241m=\u001b[39m dominoes[batch[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mselection\u001b[39m\u001b[38;5;124m\"\u001b[39m][ib]]\n\u001b[0;32m----> 9\u001b[0m reward \u001b[38;5;241m=\u001b[39m \u001b[43mdataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_measurereward_sequencer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbatch\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtarget\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/usr/local/Caskroom/miniforge/base/envs/dominoes/lib/python3.9/site-packages/torch/utils/_contextlib.py:115\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m    113\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    114\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 115\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Documents/GitHub/dominoes/dominoes/datasets/base.py:423\u001b[0m, in \u001b[0;36m_measurereward_sequencer\u001b[0;34m(self, choices, batch, return_direction)\u001b[0m\n\u001b[1;32m    421\u001b[0m elif isinstance(batch[\"value_method\"], str) and batch[\"value_method\"].isdigit() and int(batch[\"value_method\"]) > 0:\n\u001b[1;32m    422\u001b[0m     valid_play_value = float(int(batch[\"value_method\"]))\n\u001b[0;32m--> 423\u001b[0m else:\n\u001b[1;32m    424\u001b[0m     raise ValueError(\"did not recognize value_method, it has to be either 'dominoe' or a string representation of a positive digit\")\n\u001b[1;32m    426\u001b[0m # initialize these tracker variables\n",
+      "\u001b[0;31mValueError\u001b[0m: did not recognize value_method, it has to be either 'dominoe' or a string representation of a positive digit"
+     ]
+    }
+   ],
    "source": [
     "highest_dominoe = 9\n",
-    "dataset = DominoeDataset(\"sequencer\", highest_dominoe, hand_size=12, return_target=True)\n",
+    "dataset = DominoeDataset(\"sequencer\", highest_dominoe, hand_size=8, return_target=True)\n",
+    "\n",
+    "batch = dataset.generate_batch(train=False, batch_size=4)\n",
+    "dominoes = dataset.get_dominoe_set(train=False)\n",
     "\n",
-    "batch = dataset.generate_batch(train=False, batch_size=128, return_full=True)"
+    "ib = 0\n",
+    "hand = dominoes[batch[\"selection\"][ib]]\n",
+    "reward = dataset._measurereward_sequencer(batch[\"target\"], batch)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.int64"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dominoes.dtype"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "b7e87b30",
    "metadata": {},
    "outputs": [],
    "source": []