diff --git a/.gitignore b/.gitignore
index f07543c..64d52a7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -210,3 +210,5 @@ result_dirs/gsm/gemma-2-27b-it@vllm.json
 result_dirs/zebra-grid/gemma-2-27b-it@vllm.json
 result_dirs_parsed/
 state_of_limit/task_summary_*.json
+result_dirs/zebra-grid/bon_32_v2/gpt-4o-mini-2024-07-18.json
+result_dirs/zebra-grid/bon_64/gpt-4o-mini-2024-07-18.json
diff --git a/result_dirs/zebra-grid.summary.json b/result_dirs/zebra-grid.summary.json
index 70e5cbf..b3b9828 100644
--- a/result_dirs/zebra-grid.summary.json
+++ b/result_dirs/zebra-grid.summary.json
@@ -1,706 +1,41 @@
 [
   {
-    "Model": "o1-preview-2024-09-12",
-    "Mode": "greedy",
-    "Puzzle Acc": "71.40",
-    "Cell Acc": "75.14",
-    "No answer": "0.30",
-    "Easy Puzzle Acc": "98.57",
-    "Hard Puzzle Acc": "60.83",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1565.88"
-  },
-  {
-    "Model": "o1-preview-2024-09-12-v2",
-    "Mode": "greedy",
-    "Puzzle Acc": "70.40",
-    "Cell Acc": "74.18",
-    "No answer": "0.40",
-    "Easy Puzzle Acc": "98.21",
-    "Hard Puzzle Acc": "59.58",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1559.71"
-  },
-  {
-    "Model": "o1-mini-2024-09-12-v3",
-    "Mode": "greedy",
-    "Puzzle Acc": "59.70",
-    "Cell Acc": "70.32",
-    "No answer": "1.00",
-    "Easy Puzzle Acc": "86.07",
-    "Hard Puzzle Acc": "49.44",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1166.38"
-  },
-  {
-    "Model": "o1-mini-2024-09-12-v2",
-    "Mode": "greedy",
-    "Puzzle Acc": "56.80",
-    "Cell Acc": "69.87",
-    "No answer": "1.30",
-    "Easy Puzzle Acc": "82.86",
-    "Hard Puzzle Acc": "46.67",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1164.95"
-  },
-  {
-    "Model": "o1-mini-2024-09-12",
-    "Mode": "greedy",
-    "Puzzle Acc": "52.60",
-    "Cell Acc": "52.29",
-    "No answer": "0.80",
-    "Easy Puzzle Acc": "87.14",
-    "Hard Puzzle Acc": "39.17",
-    "Total Puzzles": 1000,
-    "Reason Lens": "993.28"
-  },
-  {
-    "Model": "claude-3-5-sonnet-20240620",
-    "Mode": "greedy",
-    "Puzzle Acc": "33.40",
-    "Cell Acc": "54.34",
-    "No answer": "0.00",
-    "Easy Puzzle Acc": "87.50",
-    "Hard Puzzle Acc": "12.36",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1141.94"
-  },
-  {
-    "Model": "claude-3-5-sonnet-20240620",
-    "Mode": "sampling",
-    "Puzzle Acc": "33.40",
-    "Cell Acc": "53.01",
-    "No answer": "0.10",
-    "Easy Puzzle Acc": "88.21",
-    "Hard Puzzle Acc": "12.08",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1153.83"
-  },
-  {
-    "Model": "Llama-3.1-405B-Inst-fp8@together",
-    "Mode": "greedy",
-    "Puzzle Acc": "32.60",
-    "Cell Acc": "45.80",
-    "No answer": "12.50",
-    "Easy Puzzle Acc": "87.14",
-    "Hard Puzzle Acc": "11.39",
-    "Total Puzzles": 1000,
-    "Reason Lens": "314.66"
-  },
-  {
-    "Model": "Llama-3.1-405B-Inst-fp8@together",
-    "Mode": "sampling",
-    "Puzzle Acc": "32.60",
-    "Cell Acc": "47.04",
-    "No answer": "10.80",
-    "Easy Puzzle Acc": "86.07",
-    "Hard Puzzle Acc": "11.81",
-    "Total Puzzles": 1000,
-    "Reason Lens": "439.96"
-  },
-  {
-    "Model": "gpt-4o-2024-08-06",
-    "Mode": "greedy",
-    "Puzzle Acc": "31.70",
-    "Cell Acc": "50.34",
-    "No answer": "3.60",
-    "Easy Puzzle Acc": "84.64",
-    "Hard Puzzle Acc": "11.11",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1106.51"
-  },
-  {
-    "Model": "gpt-4o-2024-05-13",
-    "Mode": "sampling",
-    "Puzzle Acc": "30.80",
-    "Cell Acc": "46.19",
-    "No answer": "6.60",
-    "Easy Puzzle Acc": "81.07",
-    "Hard Puzzle Acc": "11.25",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1549.74"
-  },
-  {
-    "Model": "gemini-1.5-pro-exp-0827",
-    "Mode": "greedy",
-    "Puzzle Acc": "30.50",
-    "Cell Acc": "50.84",
-    "No answer": "0.80",
-    "Easy Puzzle Acc": "79.64",
-    "Hard Puzzle Acc": "11.39",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1594.47"
-  },
-  {
-    "Model": "Llama-3.1-405B-Inst@sambanova",
-    "Mode": "greedy",
-    "Puzzle Acc": "30.10",
-    "Cell Acc": "39.06",
-    "No answer": "24.70",
-    "Easy Puzzle Acc": "84.64",
-    "Hard Puzzle Acc": "8.89",
-    "Total Puzzles": 1000,
-    "Reason Lens": "2001.12"
-  },
-  {
-    "Model": "chatgpt-4o-latest-24-09-07",
-    "Mode": "greedy",
-    "Puzzle Acc": "29.90",
-    "Cell Acc": "48.83",
-    "No answer": "4.20",
-    "Easy Puzzle Acc": "81.43",
-    "Hard Puzzle Acc": "9.86",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1539.99"
-  },
-  {
-    "Model": "Mistral-Large-2",
-    "Mode": "greedy",
-    "Puzzle Acc": "29.00",
-    "Cell Acc": "47.64",
-    "No answer": "1.70",
-    "Easy Puzzle Acc": "80.36",
-    "Hard Puzzle Acc": "9.03",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1592.39"
-  },
-  {
-    "Model": "gpt-4-turbo-2024-04-09",
-    "Mode": "greedy",
-    "Puzzle Acc": "28.40",
-    "Cell Acc": "47.90",
-    "No answer": "0.10",
-    "Easy Puzzle Acc": "80.71",
-    "Hard Puzzle Acc": "8.06",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1148.46"
-  },
-  {
-    "Model": "gpt-4o-2024-05-13",
-    "Mode": "greedy",
-    "Puzzle Acc": "28.20",
-    "Cell Acc": "38.72",
-    "No answer": "19.30",
-    "Easy Puzzle Acc": "77.86",
-    "Hard Puzzle Acc": "8.89",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1643.51"
-  },
-  {
-    "Model": "gpt-4-0314",
-    "Mode": "greedy",
-    "Puzzle Acc": "27.10",
-    "Cell Acc": "47.43",
-    "No answer": "0.20",
-    "Easy Puzzle Acc": "77.14",
-    "Hard Puzzle Acc": "7.64",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1203.17"
-  },
-  {
-    "Model": "claude-3-opus-20240229",
-    "Mode": "greedy",
-    "Puzzle Acc": "27.00",
-    "Cell Acc": "48.91",
-    "No answer": "0.00",
-    "Easy Puzzle Acc": "78.21",
-    "Hard Puzzle Acc": "7.08",
-    "Total Puzzles": 1000,
-    "Reason Lens": "855.72"
-  },
-  {
-    "Model": "Qwen2.5-72B-Instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "26.60",
-    "Cell Acc": "40.92",
-    "No answer": "11.90",
-    "Easy Puzzle Acc": "76.43",
-    "Hard Puzzle Acc": "7.22",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1795.90"
-  },
-  {
-    "Model": "gpt-4-turbo-2024-04-09",
-    "Mode": "sampling",
-    "Puzzle Acc": "26.40",
-    "Cell Acc": "47.93",
-    "No answer": "0.00",
-    "Easy Puzzle Acc": "74.29",
-    "Hard Puzzle Acc": "7.78",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1165.90"
-  },
-  {
-    "Model": "Qwen2.5-32B-Instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "26.10",
-    "Cell Acc": "43.39",
-    "No answer": "6.30",
-    "Easy Puzzle Acc": "77.50",
-    "Hard Puzzle Acc": "6.11",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1333.07"
-  },
-  {
-    "Model": "gemini-1.5-pro-exp-0801",
-    "Mode": "greedy",
-    "Puzzle Acc": "25.20",
-    "Cell Acc": "48.50",
+    "Model": "gpt-4o-mini-2024-07-18",
+    "Mode": "bon_64",
+    "Puzzle Acc": "47.90",
+    "Cell Acc": "73.42",
     "No answer": "0.00",
-    "Easy Puzzle Acc": "72.50",
-    "Hard Puzzle Acc": "6.81",
+    "Easy Puzzle Acc": "97.14",
+    "Hard Puzzle Acc": "28.75",
     "Total Puzzles": 1000,
-    "Reason Lens": "1389.75"
-  },
-  {
-    "Model": "Llama-3.1-405B-Inst@hyperbolic",
-    "Mode": "greedy",
-    "Puzzle Acc": "25.00",
-    "Cell Acc": "46.62",
-    "No answer": "6.25",
-    "Easy Puzzle Acc": "66.67",
-    "Hard Puzzle Acc": "15.38",
-    "Total Puzzles": 16,
-    "Reason Lens": "1517.13"
-  },
-  {
-    "Model": "gemini-1.5-flash-exp-0827",
-    "Mode": "greedy",
-    "Puzzle Acc": "25.00",
-    "Cell Acc": "43.56",
-    "No answer": "8.50",
-    "Easy Puzzle Acc": "70.71",
-    "Hard Puzzle Acc": "7.22",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1705.11"
-  },
-  {
-    "Model": "Meta-Llama-3.1-70B-Instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "24.90",
-    "Cell Acc": "27.98",
-    "No answer": "43.00",
-    "Easy Puzzle Acc": "73.57",
-    "Hard Puzzle Acc": "5.97",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1483.68"
-  },
-  {
-    "Model": "deepseek-v2-chat-0628",
-    "Mode": "greedy",
-    "Puzzle Acc": "22.70",
-    "Cell Acc": "42.46",
-    "No answer": "5.20",
-    "Easy Puzzle Acc": "68.57",
-    "Hard Puzzle Acc": "4.86",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1260.23"
-  },
-  {
-    "Model": "deepseek-v2.5-0908",
-    "Mode": "greedy",
-    "Puzzle Acc": "22.10",
-    "Cell Acc": "38.01",
-    "No answer": "12.70",
-    "Easy Puzzle Acc": "68.21",
-    "Hard Puzzle Acc": "4.17",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1294.46"
-  },
-  {
-    "Model": "Qwen2-72B-Instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "21.40",
-    "Cell Acc": "38.32",
-    "No answer": "10.20",
-    "Easy Puzzle Acc": "63.93",
-    "Hard Puzzle Acc": "4.86",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1813.82"
-  },
-  {
-    "Model": "deepseek-v2-coder-0614",
-    "Mode": "greedy",
-    "Puzzle Acc": "21.10",
-    "Cell Acc": "41.58",
-    "No answer": "4.90",
-    "Easy Puzzle Acc": "64.64",
-    "Hard Puzzle Acc": "4.17",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1324.55"
-  },
-  {
-    "Model": "deepseek-v2-coder-0724",
-    "Mode": "greedy",
-    "Puzzle Acc": "20.50",
-    "Cell Acc": "42.35",
-    "No answer": "3.40",
-    "Easy Puzzle Acc": "61.79",
-    "Hard Puzzle Acc": "4.44",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1230.63"
+    "Reason Lens": "901.94",
+    "N_Mode": "best_of_n",
+    "N_Size": 64
   },
   {
     "Model": "gpt-4o-mini-2024-07-18",
-    "Mode": "greedy",
-    "Puzzle Acc": "20.10",
-    "Cell Acc": "41.26",
-    "No answer": "0.10",
-    "Easy Puzzle Acc": "62.50",
-    "Hard Puzzle Acc": "3.61",
-    "Total Puzzles": 1000,
-    "Reason Lens": "943.52"
-  },
-  {
-    "Model": "gemini-1.5-pro",
-    "Mode": "sampling",
-    "Puzzle Acc": "19.70",
-    "Cell Acc": "45.24",
-    "No answer": "0.40",
-    "Easy Puzzle Acc": "60.00",
-    "Hard Puzzle Acc": "4.03",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1356.77"
-  },
-  {
-    "Model": "gemini-1.5-flash",
-    "Mode": "greedy",
-    "Puzzle Acc": "19.40",
-    "Cell Acc": "31.77",
-    "No answer": "22.70",
-    "Easy Puzzle Acc": "59.29",
-    "Hard Puzzle Acc": "3.89",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1538.18"
-  },
-  {
-    "Model": "gemini-1.5-pro",
-    "Mode": "greedy",
-    "Puzzle Acc": "19.40",
-    "Cell Acc": "44.59",
-    "No answer": "0.80",
-    "Easy Puzzle Acc": "55.71",
-    "Hard Puzzle Acc": "5.28",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1336.17"
-  },
-  {
-    "Model": "yi-large-preview",
-    "Mode": "greedy",
-    "Puzzle Acc": "18.90",
-    "Cell Acc": "42.61",
-    "No answer": "1.40",
-    "Easy Puzzle Acc": "58.93",
-    "Hard Puzzle Acc": "3.33",
-    "Total Puzzles": 1000,
-    "Reason Lens": "833.36"
-  },
-  {
-    "Model": "yi-large",
-    "Mode": "greedy",
-    "Puzzle Acc": "18.80",
-    "Cell Acc": "39.83",
-    "No answer": "1.80",
-    "Easy Puzzle Acc": "58.21",
-    "Hard Puzzle Acc": "3.47",
-    "Total Puzzles": 1000,
-    "Reason Lens": "757.01"
-  },
-  {
-    "Model": "claude-3-sonnet-20240229",
-    "Mode": "greedy",
-    "Puzzle Acc": "18.70",
-    "Cell Acc": "43.66",
+    "Mode": "bon_32",
+    "Puzzle Acc": "42.70",
+    "Cell Acc": "68.86",
     "No answer": "0.00",
-    "Easy Puzzle Acc": "58.93",
-    "Hard Puzzle Acc": "3.06",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1095.37"
-  },
-  {
-    "Model": "Qwen2-72B-Instruct",
-    "Mode": "sampling",
-    "Puzzle Acc": "18.70",
-    "Cell Acc": "40.57",
-    "No answer": "3.20",
-    "Easy Puzzle Acc": "57.50",
-    "Hard Puzzle Acc": "3.61",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1894.72"
-  },
-  {
-    "Model": "gemini-1.5-flash",
-    "Mode": "sampling",
-    "Puzzle Acc": "18.40",
-    "Cell Acc": "36.03",
-    "No answer": "12.80",
-    "Easy Puzzle Acc": "57.86",
-    "Hard Puzzle Acc": "3.06",
+    "Easy Puzzle Acc": "97.50",
+    "Hard Puzzle Acc": "21.39",
     "Total Puzzles": 1000,
-    "Reason Lens": "1713.03"
+    "Reason Lens": "980.51",
+    "N_Mode": "best_of_n",
+    "N_Size": 32
   },
   {
-    "Model": "Meta-Llama-3-70B-Instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "16.80",
-    "Cell Acc": "42.31",
-    "No answer": "0.20",
-    "Easy Puzzle Acc": "52.86",
-    "Hard Puzzle Acc": "2.78",
-    "Total Puzzles": 1000,
-    "Reason Lens": "809.95"
-  },
-  {
-    "Model": "Athene-70B",
-    "Mode": "greedy",
-    "Puzzle Acc": "16.70",
-    "Cell Acc": "32.98",
-    "No answer": "21.10",
-    "Easy Puzzle Acc": "52.50",
-    "Hard Puzzle Acc": "2.78",
-    "Total Puzzles": 1000,
-    "Reason Lens": "391.19"
-  },
-  {
-    "Model": "gemma-2-27b-it",
-    "Mode": "greedy",
-    "Puzzle Acc": "16.30",
-    "Cell Acc": "41.18",
-    "No answer": "1.10",
-    "Easy Puzzle Acc": "50.71",
-    "Hard Puzzle Acc": "2.92",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1014.56"
-  },
-  {
-    "Model": "claude-3-haiku-20240307",
-    "Mode": "greedy",
-    "Puzzle Acc": "14.30",
-    "Cell Acc": "37.87",
-    "No answer": "0.10",
-    "Easy Puzzle Acc": "47.86",
-    "Hard Puzzle Acc": "1.25",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1015.06"
-  },
-  {
-    "Model": "command-r-plus",
-    "Mode": "greedy",
-    "Puzzle Acc": "13.90",
-    "Cell Acc": "39.01",
-    "No answer": "0.20",
-    "Easy Puzzle Acc": "44.64",
-    "Hard Puzzle Acc": "1.94",
-    "Total Puzzles": 1000,
-    "Reason Lens": "810.53"
-  },
-  {
-    "Model": "reka-core-20240501",
-    "Mode": "greedy",
-    "Puzzle Acc": "13.00",
-    "Cell Acc": "33.88",
-    "No answer": "4.00",
-    "Easy Puzzle Acc": "43.21",
-    "Hard Puzzle Acc": "1.25",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1078.29"
-  },
-  {
-    "Model": "gemma-2-9b-it",
-    "Mode": "greedy",
-    "Puzzle Acc": "12.80",
-    "Cell Acc": "36.79",
+    "Model": "gpt-4o-mini-2024-07-18",
+    "Mode": "bon_32_v2",
+    "Puzzle Acc": "42.60",
+    "Cell Acc": "69.39",
     "No answer": "0.00",
-    "Easy Puzzle Acc": "41.79",
-    "Hard Puzzle Acc": "1.53",
-    "Total Puzzles": 1000,
-    "Reason Lens": "849.84"
-  },
-  {
-    "Model": "Meta-Llama-3.1-8B-Instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "12.80",
-    "Cell Acc": "13.68",
-    "No answer": "61.50",
-    "Easy Puzzle Acc": "43.57",
-    "Hard Puzzle Acc": "0.83",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1043.90"
-  },
-  {
-    "Model": "Qwen2.5-7B-Instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "12.00",
-    "Cell Acc": "30.67",
-    "No answer": "9.50",
-    "Easy Puzzle Acc": "38.93",
-    "Hard Puzzle Acc": "1.53",
-    "Total Puzzles": 1000,
-    "Reason Lens": "850.93"
-  },
-  {
-    "Model": "Meta-Llama-3-8B-Instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "11.90",
-    "Cell Acc": "23.70",
-    "No answer": "29.20",
-    "Easy Puzzle Acc": "40.71",
-    "Hard Puzzle Acc": "0.69",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1216.40"
-  },
-  {
-    "Model": "Mistral-Nemo-Instruct-2407",
-    "Mode": "greedy",
-    "Puzzle Acc": "11.80",
-    "Cell Acc": "34.93",
-    "No answer": "1.60",
-    "Easy Puzzle Acc": "38.93",
-    "Hard Puzzle Acc": "1.25",
-    "Total Puzzles": 1000,
-    "Reason Lens": "925.88"
-  },
-  {
-    "Model": "Phi-3-mini-4k-instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "11.60",
-    "Cell Acc": "13.50",
-    "No answer": "59.00",
-    "Easy Puzzle Acc": "38.21",
-    "Hard Puzzle Acc": "1.25",
-    "Total Puzzles": 1000,
-    "Reason Lens": "790.29"
-  },
-  {
-    "Model": "Yi-1.5-34B-Chat",
-    "Mode": "greedy",
-    "Puzzle Acc": "11.50",
-    "Cell Acc": "32.73",
-    "No answer": "4.40",
-    "Easy Puzzle Acc": "37.50",
-    "Hard Puzzle Acc": "1.39",
-    "Total Puzzles": 1000,
-    "Reason Lens": "869.65"
-  },
-  {
-    "Model": "Meta-Llama-3-8B-Instruct",
-    "Mode": "sampling",
-    "Puzzle Acc": "11.00",
-    "Cell Acc": "26.11",
-    "No answer": "22.30",
-    "Easy Puzzle Acc": "36.79",
-    "Hard Puzzle Acc": "0.97",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1282.40"
-  },
-  {
-    "Model": "gpt-3.5-turbo-0125",
-    "Mode": "greedy",
-    "Puzzle Acc": "10.10",
-    "Cell Acc": "33.06",
-    "No answer": "0.10",
-    "Easy Puzzle Acc": "33.57",
-    "Hard Puzzle Acc": "0.97",
-    "Total Puzzles": 1000,
-    "Reason Lens": "820.66"
-  },
-  {
-    "Model": "command-r",
-    "Mode": "greedy",
-    "Puzzle Acc": "9.90",
-    "Cell Acc": "32.66",
-    "No answer": "1.50",
-    "Easy Puzzle Acc": "32.14",
-    "Hard Puzzle Acc": "1.25",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1005.17"
-  },
-  {
-    "Model": "reka-flash-20240226",
-    "Mode": "greedy",
-    "Puzzle Acc": "9.30",
-    "Cell Acc": "25.67",
-    "No answer": "18.70",
-    "Easy Puzzle Acc": "30.71",
-    "Hard Puzzle Acc": "0.97",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1074.80"
-  },
-  {
-    "Model": "mathstral-7B-v0.1",
-    "Mode": "greedy",
-    "Puzzle Acc": "9.00",
-    "Cell Acc": "20.42",
-    "No answer": "36.00",
-    "Easy Puzzle Acc": "30.00",
-    "Hard Puzzle Acc": "0.83",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1148.16"
-  },
-  {
-    "Model": "Mixtral-8x7B-Instruct-v0.1",
-    "Mode": "greedy",
-    "Puzzle Acc": "8.70",
-    "Cell Acc": "26.47",
-    "No answer": "20.30",
-    "Easy Puzzle Acc": "28.93",
-    "Hard Puzzle Acc": "0.83",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1177.21"
-  },
-  {
-    "Model": "Qwen2-7B-Instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "8.40",
-    "Cell Acc": "22.06",
-    "No answer": "24.40",
-    "Easy Puzzle Acc": "29.29",
-    "Hard Puzzle Acc": "0.28",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1473.23"
-  },
-  {
-    "Model": "Phi-3.5-mini-instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "6.40",
-    "Cell Acc": "5.98",
-    "No answer": "80.60",
-    "Easy Puzzle Acc": "21.79",
-    "Hard Puzzle Acc": "0.42",
-    "Total Puzzles": 1000,
-    "Reason Lens": "718.43"
-  },
-  {
-    "Model": "Qwen2.5-3B-Instruct",
-    "Mode": "greedy",
-    "Puzzle Acc": "4.80",
-    "Cell Acc": "11.44",
-    "No answer": "56.70",
-    "Easy Puzzle Acc": "17.14",
-    "Hard Puzzle Acc": "0.00",
-    "Total Puzzles": 1000,
-    "Reason Lens": "906.58"
-  },
-  {
-    "Model": "gemma-2-2b-it",
-    "Mode": "greedy",
-    "Puzzle Acc": "4.20",
-    "Cell Acc": "9.97",
-    "No answer": "57.20",
-    "Easy Puzzle Acc": "14.29",
-    "Hard Puzzle Acc": "0.28",
-    "Total Puzzles": 1000,
-    "Reason Lens": "1032.89"
-  },
-  {
-    "Model": "Yi-1.5-9B-Chat",
-    "Mode": "greedy",
-    "Puzzle Acc": "2.30",
-    "Cell Acc": "7.53",
-    "No answer": "11.30",
-    "Easy Puzzle Acc": "8.21",
-    "Hard Puzzle Acc": "0.00",
+    "Easy Puzzle Acc": "96.79",
+    "Hard Puzzle Acc": "21.53",
     "Total Puzzles": 1000,
-    "Reason Lens": "1592.60"
+    "Reason Lens": "978.38",
+    "N_Mode": "best_of_n",
+    "N_Size": 32
   }
 ]
\ No newline at end of file
diff --git a/result_dirs/zebra-grid.summary.md b/result_dirs/zebra-grid.summary.md
index fda6f0e..298561d 100644
--- a/result_dirs/zebra-grid.summary.md
+++ b/result_dirs/zebra-grid.summary.md
@@ -1,66 +1,5 @@
-|              Model               |   Mode   |  Puzzle Acc  |  Easy Puzzle Acc  |  Hard Puzzle Acc  |  Cell Acc  |  No answer  |  Total Puzzles  |  Reason Lens  |
-|----------------------------------|----------|--------------|-------------------|-------------------|------------|-------------|-----------------|---------------|
-|      o1-preview-2024-09-12       |  greedy  |     71.4     |       98.57       |       60.83       |   75.14    |     0.3     |      1000       |    1565.88    |
-|     o1-preview-2024-09-12-v2     |  greedy  |     70.4     |       98.21       |       59.58       |   74.18    |     0.4     |      1000       |    1559.71    |
-|      o1-mini-2024-09-12-v3       |  greedy  |     59.7     |       86.07       |       49.44       |   70.32    |      1      |      1000       |    1166.38    |
-|      o1-mini-2024-09-12-v2       |  greedy  |     56.8     |       82.86       |       46.67       |   69.87    |     1.3     |      1000       |    1164.95    |
-|        o1-mini-2024-09-12        |  greedy  |     52.6     |       87.14       |       39.17       |   52.29    |     0.8     |      1000       |    993.28     |
-|    claude-3-5-sonnet-20240620    |  greedy  |     33.4     |       87.5        |       12.36       |   54.34    |      0      |      1000       |    1141.94    |
-|    claude-3-5-sonnet-20240620    | sampling |     33.4     |       88.21       |       12.08       |   53.01    |     0.1     |      1000       |    1153.83    |
-| Llama-3.1-405B-Inst-fp8@together |  greedy  |     32.6     |       87.14       |       11.39       |    45.8    |    12.5     |      1000       |    314.66     |
-| Llama-3.1-405B-Inst-fp8@together | sampling |     32.6     |       86.07       |       11.81       |   47.04    |    10.8     |      1000       |    439.96     |
-|        gpt-4o-2024-08-06         |  greedy  |     31.7     |       84.64       |       11.11       |   50.34    |     3.6     |      1000       |    1106.51    |
-|        gpt-4o-2024-05-13         | sampling |     30.8     |       81.07       |       11.25       |   46.19    |     6.6     |      1000       |    1549.74    |
-|     gemini-1.5-pro-exp-0827      |  greedy  |     30.5     |       79.64       |       11.39       |   50.84    |     0.8     |      1000       |    1594.47    |
-|  Llama-3.1-405B-Inst@sambanova   |  greedy  |     30.1     |       84.64       |       8.89        |   39.06    |    24.7     |      1000       |    2001.12    |
-|    chatgpt-4o-latest-24-09-07    |  greedy  |     29.9     |       81.43       |       9.86        |   48.83    |     4.2     |      1000       |    1539.99    |
-|         Mistral-Large-2          |  greedy  |      29      |       80.36       |       9.03        |   47.64    |     1.7     |      1000       |    1592.39    |
-|      gpt-4-turbo-2024-04-09      |  greedy  |     28.4     |       80.71       |       8.06        |    47.9    |     0.1     |      1000       |    1148.46    |
-|        gpt-4o-2024-05-13         |  greedy  |     28.2     |       77.86       |       8.89        |   38.72    |    19.3     |      1000       |    1643.51    |
-|            gpt-4-0314            |  greedy  |     27.1     |       77.14       |       7.64        |   47.43    |     0.2     |      1000       |    1203.17    |
-|      claude-3-opus-20240229      |  greedy  |      27      |       78.21       |       7.08        |   48.91    |      0      |      1000       |    855.72     |
-|       Qwen2.5-72B-Instruct       |  greedy  |     26.6     |       76.43       |       7.22        |   40.92    |    11.9     |      1000       |    1795.9     |
-|      gpt-4-turbo-2024-04-09      | sampling |     26.4     |       74.29       |       7.78        |   47.93    |      0      |      1000       |    1165.9     |
-|       Qwen2.5-32B-Instruct       |  greedy  |     26.1     |       77.5        |       6.11        |   43.39    |     6.3     |      1000       |    1333.07    |
-|     gemini-1.5-pro-exp-0801      |  greedy  |     25.2     |       72.5        |       6.81        |    48.5    |      0      |      1000       |    1389.75    |
-|  Llama-3.1-405B-Inst@hyperbolic  |  greedy  |      25      |       66.67       |       15.38       |   46.62    |    6.25     |       16        |    1517.13    |
-|    gemini-1.5-flash-exp-0827     |  greedy  |      25      |       70.71       |       7.22        |   43.56    |     8.5     |      1000       |    1705.11    |
-|   Meta-Llama-3.1-70B-Instruct    |  greedy  |     24.9     |       73.57       |       5.97        |   27.98    |     43      |      1000       |    1483.68    |
-|      deepseek-v2-chat-0628       |  greedy  |     22.7     |       68.57       |       4.86        |   42.46    |     5.2     |      1000       |    1260.23    |
-|        deepseek-v2.5-0908        |  greedy  |     22.1     |       68.21       |       4.17        |   38.01    |    12.7     |      1000       |    1294.46    |
-|        Qwen2-72B-Instruct        |  greedy  |     21.4     |       63.93       |       4.86        |   38.32    |    10.2     |      1000       |    1813.82    |
-|      deepseek-v2-coder-0614      |  greedy  |     21.1     |       64.64       |       4.17        |   41.58    |     4.9     |      1000       |    1324.55    |
-|      deepseek-v2-coder-0724      |  greedy  |     20.5     |       61.79       |       4.44        |   42.35    |     3.4     |      1000       |    1230.63    |
-|      gpt-4o-mini-2024-07-18      |  greedy  |     20.1     |       62.5        |       3.61        |   41.26    |     0.1     |      1000       |    943.52     |
-|          gemini-1.5-pro          | sampling |     19.7     |        60         |       4.03        |   45.24    |     0.4     |      1000       |    1356.77    |
-|         gemini-1.5-flash         |  greedy  |     19.4     |       59.29       |       3.89        |   31.77    |    22.7     |      1000       |    1538.18    |
-|          gemini-1.5-pro          |  greedy  |     19.4     |       55.71       |       5.28        |   44.59    |     0.8     |      1000       |    1336.17    |
-|         yi-large-preview         |  greedy  |     18.9     |       58.93       |       3.33        |   42.61    |     1.4     |      1000       |    833.36     |
-|             yi-large             |  greedy  |     18.8     |       58.21       |       3.47        |   39.83    |     1.8     |      1000       |    757.01     |
-|     claude-3-sonnet-20240229     |  greedy  |     18.7     |       58.93       |       3.06        |   43.66    |      0      |      1000       |    1095.37    |
-|        Qwen2-72B-Instruct        | sampling |     18.7     |       57.5        |       3.61        |   40.57    |     3.2     |      1000       |    1894.72    |
-|         gemini-1.5-flash         | sampling |     18.4     |       57.86       |       3.06        |   36.03    |    12.8     |      1000       |    1713.03    |
-|    Meta-Llama-3-70B-Instruct     |  greedy  |     16.8     |       52.86       |       2.78        |   42.31    |     0.2     |      1000       |    809.95     |
-|            Athene-70B            |  greedy  |     16.7     |       52.5        |       2.78        |   32.98    |    21.1     |      1000       |    391.19     |
-|          gemma-2-27b-it          |  greedy  |     16.3     |       50.71       |       2.92        |   41.18    |     1.1     |      1000       |    1014.56    |
-|     claude-3-haiku-20240307      |  greedy  |     14.3     |       47.86       |       1.25        |   37.87    |     0.1     |      1000       |    1015.06    |
-|          command-r-plus          |  greedy  |     13.9     |       44.64       |       1.94        |   39.01    |     0.2     |      1000       |    810.53     |
-|        reka-core-20240501        |  greedy  |      13      |       43.21       |       1.25        |   33.88    |      4      |      1000       |    1078.29    |
-|          gemma-2-9b-it           |  greedy  |     12.8     |       41.79       |       1.53        |   36.79    |      0      |      1000       |    849.84     |
-|    Meta-Llama-3.1-8B-Instruct    |  greedy  |     12.8     |       43.57       |       0.83        |   13.68    |    61.5     |      1000       |    1043.9     |
-|       Qwen2.5-7B-Instruct        |  greedy  |      12      |       38.93       |       1.53        |   30.67    |     9.5     |      1000       |    850.93     |
-|     Meta-Llama-3-8B-Instruct     |  greedy  |     11.9     |       40.71       |       0.69        |    23.7    |    29.2     |      1000       |    1216.4     |
-|    Mistral-Nemo-Instruct-2407    |  greedy  |     11.8     |       38.93       |       1.25        |   34.93    |     1.6     |      1000       |    925.88     |
-|      Phi-3-mini-4k-instruct      |  greedy  |     11.6     |       38.21       |       1.25        |    13.5    |     59      |      1000       |    790.29     |
-|         Yi-1.5-34B-Chat          |  greedy  |     11.5     |       37.5        |       1.39        |   32.73    |     4.4     |      1000       |    869.65     |
-|     Meta-Llama-3-8B-Instruct     | sampling |      11      |       36.79       |       0.97        |   26.11    |    22.3     |      1000       |    1282.4     |
-|        gpt-3.5-turbo-0125        |  greedy  |     10.1     |       33.57       |       0.97        |   33.06    |     0.1     |      1000       |    820.66     |
-|            command-r             |  greedy  |     9.9      |       32.14       |       1.25        |   32.66    |     1.5     |      1000       |    1005.17    |
-|       reka-flash-20240226        |  greedy  |     9.3      |       30.71       |       0.97        |   25.67    |    18.7     |      1000       |    1074.8     |
-|        mathstral-7B-v0.1         |  greedy  |      9       |        30         |       0.83        |   20.42    |     36      |      1000       |    1148.16    |
-|    Mixtral-8x7B-Instruct-v0.1    |  greedy  |     8.7      |       28.93       |       0.83        |   26.47    |    20.3     |      1000       |    1177.21    |
-|        Qwen2-7B-Instruct         |  greedy  |     8.4      |       29.29       |       0.28        |   22.06    |    24.4     |      1000       |    1473.23    |
-|      Phi-3.5-mini-instruct       |  greedy  |     6.4      |       21.79       |       0.42        |    5.98    |    80.6     |      1000       |    718.43     |
-|       Qwen2.5-3B-Instruct        |  greedy  |     4.8      |       17.14       |         0         |   11.44    |    56.7     |      1000       |    906.58     |
-|          gemma-2-2b-it           |  greedy  |     4.2      |       14.29       |       0.28        |    9.97    |    57.2     |      1000       |    1032.89    |
-|          Yi-1.5-9B-Chat          |  greedy  |     2.3      |       8.21        |         0         |    7.53    |    11.3     |      1000       |    1592.6     |
\ No newline at end of file
+|         Model          |   Mode    |  N_Mode   |  N_Size  |  Puzzle Acc  |  Easy Puzzle Acc  |  Hard Puzzle Acc  |  Cell Acc  |  No answer  |  Total Puzzles  |  Reason Lens  |
+|------------------------|-----------|-----------|----------|--------------|-------------------|-------------------|------------|-------------|-----------------|---------------|
+| gpt-4o-mini-2024-07-18 |  bon_64   | best_of_n |    64    |     47.9     |       97.14       |       28.75       |   73.42    |      0      |      1000       |    901.94     |
+| gpt-4o-mini-2024-07-18 |  bon_32   | best_of_n |    32    |     42.7     |       97.5        |       21.39       |   68.86    |      0      |      1000       |    980.51     |
+| gpt-4o-mini-2024-07-18 | bon_32_v2 | best_of_n |    32    |     42.6     |       96.79       |       21.53       |   69.39    |      0      |      1000       |    978.38     |
\ No newline at end of file
diff --git a/src/evaluation/zebra_grid_eval.py b/src/evaluation/zebra_grid_eval.py
index d20ca6f..924a10c 100644
--- a/src/evaluation/zebra_grid_eval.py
+++ b/src/evaluation/zebra_grid_eval.py
@@ -6,6 +6,9 @@
 
 from eval_utils import load_model_results, extract_last_complete_json, model_name_replacement
 
+from collections import Counter
+from collections import defaultdict
+
 private_solutions = {}
 
 def load_private_solutions():
@@ -17,7 +20,7 @@ def load_private_solutions():
 
 
 
-def eval_model(model, filepath):
+def eval_model(model, filepath, mode="best_of_n", max_N=None):
     global private_solutions
     with open(filepath, "r") as f:
         print(f"Processing {filepath}")
@@ -51,22 +54,139 @@ def eval_model(model, filepath):
             this_total_cells += len(columns) - 1
         total_cells += this_total_cells
 
-        # Read and Parse the prediction from model output
-        prediction_str = item["output"][0]     
-        prediction_json = extract_last_complete_json(prediction_str)
-        if prediction_json is None or "solution" not in prediction_json or prediction_json["solution"] is None:
-            # print("-"*100)
-            # prediction_str = prediction_str.replace("\n", "")
-            # print([prediction_str])
-            # json.loads(prediction_str)
+        # Read and Parse the predictions from model output
+        predictions = [extract_last_complete_json(output) for output in item["output"]]
+        predictions = [p for p in predictions if p is not None and "solution" in p and p["solution"] is not None]
+
+        # if all the predictions are empty, then skip the current puzzle, and add no answer count
+        if not predictions:
             no_asnwer += 1
-            # print(item["id"])
-            continue 
-        reason = prediction_json.get("reasoning", "")
-        prediction_table = prediction_json["solution"]
+            continue
+
+        # Limit the number of predictions to max_N if specified
+        if max_N is not None:
+            predictions = predictions[:max_N]
+ 
+
+        n_size = len(predictions)  # Capture the number of predictions
+
+        if n_size == 1:
+            mode = "single"
+            # Single output case
+            prediction_table = predictions[0]["solution"]
+            reason = predictions[0].get("reasoning", "") 
+        elif mode == "best_of_n":
+            # Best of N: Choose the prediction with the maximum number of correct cells 
+            max_correct_cells = 0
+            best_prediction = None
+            for prediction in predictions:
+                current_correct_cells = 0
+                prediction_table = prediction["solution"]
+                for house in solution_table:
+                    for column in solution_table[house]:
+                        if house in prediction_table and column in prediction_table[house]:
+                            truth_cell = solution_table[house][column].lower().strip()
+                            # Note that prediction_table[house][column] could be None 
+                            if prediction_table[house][column] is None:
+                                continue
+                            predicted_cell = prediction_table[house][column].lower().strip()
+                            if truth_cell == predicted_cell:
+                                current_correct_cells += 1
+                if current_correct_cells > max_correct_cells or best_prediction is None:
+                    max_correct_cells = current_correct_cells
+                    best_prediction = prediction
+            prediction_table = best_prediction["solution"]
+            reason = best_prediction.get("reasoning", "")
+
+        elif mode == "majority_of_n":
+            # Majority of N: Perform majority voting for each cell
+            prediction_table = {}
+            for house in solution_table:
+                prediction_table[house] = {}
+                for column in solution_table[house]:
+                    votes = []
+                    for prediction in predictions:
+                        if house in prediction["solution"] and column in prediction["solution"][house]:
+                            predicted_cell = prediction["solution"][house][column]
+                            if isinstance(predicted_cell, list):
+                                predicted_cell = predicted_cell[0]
+                            # Note that prediction_table[house][column] could be None 
+                            if predicted_cell is not None:
+                                votes.append(predicted_cell.lower().strip())
+                    if votes:
+                        most_common = Counter(votes).most_common(1)[0][0]
+                        prediction_table[house][column] = most_common
+                    else:
+                        prediction_table[house][column] = None
+            # reason = ""  # Reasoning is not applicable for majority voting
+            # use a random prediction to get the reasoning
+            reason = predictions[0].get("reasoning", "")
+        elif mode in ["most_common_of_n", "middle_common_of_n", "least_common_of_n"]:
+            # Choose the prediction where the cell's value is the most common among all predictions at the same positions 
+            # Specifically, we give each value at each position a score based on its popularity, and the prediction with the highest sum of scores is chosen            
+            # Initialize a dictionary to store scores for each prediction
+            prediction_scores = defaultdict(int)
+
+            # Iterate over each house and column in the solution table
+            for house in solution_table:
+                for column in solution_table[house]:
+                    # Count occurrences of each value at the current position across all predictions
+                    value_counter = Counter()
+                    for prediction in predictions:
+                        if house in prediction["solution"] and column in prediction["solution"][house]:
+                            predicted_cell = prediction["solution"][house][column]
+                            if isinstance(predicted_cell, list):
+                                predicted_cell = predicted_cell[0]
+                            if predicted_cell is not None:
+                                value_counter[predicted_cell.lower().strip()] += 1
+ 
+                    # Assign scores to each prediction based on the popularity of its value at the current position
+                    for idx, prediction in enumerate(predictions):
+                        if house in prediction["solution"] and column in prediction["solution"][house]:
+                            predicted_cell = prediction["solution"][house][column]
+                            if isinstance(predicted_cell, list):
+                                predicted_cell = predicted_cell[0] 
+                            if predicted_cell is not None:
+                                prediction_scores[idx] += value_counter[predicted_cell.lower().strip()]
+            if mode == "most_common_of_n":
+                # Select the prediction with the highest score
+                best_index = max(range(len(predictions)), key=lambda idx: prediction_scores[idx])
+                best_prediction = predictions[best_index]
+                prediction_table = best_prediction["solution"]
+                reason = best_prediction.get("reasoning", "")
+            elif mode == "middle_common_of_n":
+                # Select the prediction with the median score
+                best_index = sorted(range(len(predictions)), key=lambda idx: prediction_scores[idx])[len(predictions) // 2]
+                best_prediction = predictions[best_index]
+                prediction_table = best_prediction["solution"]
+                reason = best_prediction.get("reasoning", "")
+            elif mode == "least_common_of_n":
+                # Select the prediction with the lowest score
+                best_index = min(range(len(predictions)), key=lambda idx: prediction_scores[idx])
+                best_prediction = predictions[best_index]
+                prediction_table = best_prediction["solution"]
+                reason = best_prediction.get("reasoning", "")
         
-        reason_lens.append(len(reason))
+        elif mode in ["longest_of_n", "shortest_of_n", "median_of_n"]:
+            # Collect all predictions with their reasoning lengths
+            predictions_with_lengths = [(prediction, len(prediction.get("reasoning", ""))) for prediction in predictions]
+            
+            # Sort by reasoning length
+            predictions_with_lengths.sort(key=lambda x: x[1])
+
+            if mode == "longest_of_n":
+                best_prediction = predictions_with_lengths[-1][0]  # Last element for longest
+            elif mode == "shortest_of_n":
+                best_prediction = predictions_with_lengths[0][0]   # First element for shortest
+            elif mode == "median_of_n":
+                median_index = len(predictions_with_lengths) // 2
+                best_prediction = predictions_with_lengths[median_index][0]  # Middle element for median
 
+            prediction_table = best_prediction["solution"]
+            reason = best_prediction.get("reasoning", "")
+
+        reason_lens.append(len(reason))
+        
         this_correct_cells = 0 # number in the solution_table 
         for house in solution_table:
             for column in solution_table[house]: 
@@ -95,6 +215,7 @@ def eval_model(model, filepath):
         parsed_item["correct_cells"] = this_correct_cells
         parsed_item["total_cells"] = this_total_cells
         parsed_item["solved"] = this_correct_cells == this_total_cells
+        
         parsed_results.append(parsed_item)
 
     # # print the success rate by size; order the dict by size first  
@@ -121,16 +242,30 @@ def eval_model(model, filepath):
     result["Total Puzzles"] = num_total_puzzles
     result["Reason Lens"] = f"{sum(reason_lens)/len(reason_lens):.2f}"
     result["Model"] = model_name_replacement(result["Model"])
+    result["N_Mode"] = "single" if n_size == 1 else mode 
+    result["N_Size"] = n_size
     return result, parsed_results  # Return parsed_results along with the result
 
 
 def gen_results(run_name_folders): 
     model_results = load_model_results(run_name_folders)
 
-    columns = ["Model", "Mode", "Puzzle Acc", "Easy Puzzle Acc", "Hard Puzzle Acc", "Cell Acc",  "No answer",  "Total Puzzles", "Reason Lens"]
+    columns = ["Model", "Mode", "N_Mode", "N_Size", "Puzzle Acc", "Easy Puzzle Acc", "Hard Puzzle Acc", "Cell Acc",  "No answer",  "Total Puzzles", "Reason Lens"]
     rows = []
     for model_name, filepath in model_results.items(): 
-        result, parsed_results = eval_model(model_name, filepath) 
+        
+        # result, parsed_results = eval_model(model_name, filepath, mode="majority_of_n", max_N=32)
+        result, parsed_results = eval_model(model_name, filepath, mode="best_of_n", max_N=64)
+        # result, parsed_results = eval_model(model_name, filepath, mode="most_common_of_n", max_N=64)
+
+        # result, parsed_results = eval_model(model_name, filepath, mode="longest_of_n", max_N=32)
+        # result, parsed_results = eval_model(model_name, filepath, mode="shortest_of_n", max_N=32)
+        # result, parsed_results = eval_model(model_name, filepath, mode="median_of_n", max_N=32)
+        
+        # result, parsed_results = eval_model(model_name, filepath, mode="least_common_of_n", max_N=32)
+        # result, parsed_results = eval_model(model_name, filepath, mode="middle_common_of_n", max_N=32)
+        
+        
         # Save the parsed_results to the same filepath with a new prefix
         parsed_results_filepath = filepath.replace("result_dirs", "result_dirs_parsed")
         # Create folders if they don't exist
@@ -159,8 +294,12 @@ def gen_results(run_name_folders):
 
 if __name__ == "__main__":
     run_name_folders = {
-        "greedy": "result_dirs/zebra-grid",
-        "sampling": "result_dirs/zebra-grid/sampling",
+        # "greedy": "result_dirs/zebra-grid",
+        # "sampling": "result_dirs/zebra-grid/sampling",
+        "bon_32": "result_dirs/zebra-grid/bon_32",
+        "bon_32_v2": "result_dirs/zebra-grid/bon_32_v2",
+        "bon_64": "result_dirs/zebra-grid/bon_64",
     } 
     load_private_solutions()
     gen_results(run_name_folders)
+