whymath · whymath · Oct 16, 2025 · Oct 7, 2025 · Oct 7, 2025 · Oct 7, 2025
diff --git a/Extrapolate METR.ipynb b/Extrapolate METR.ipynb
diff --git a/Full Timelines Model.ipynb b/Full Timelines Model.ipynb
diff --git a/Simple METR models.ipynb b/Simple METR models.ipynb
@@ -17,11 +17,9 @@
    "source": [
     "import squigglepy as sq\n",
     "from datetime import datetime\n",
-    "from libs import run_model, calculate_doubling_time\n",
-    "\n",
     "\n",
-    "O3_LAUNCH_DATE = datetime(2025, 4, 16)\n",
-    "CLAUDE_3P7_LAUNCH_DATE = datetime(2025, 2, 24)\n",
+    "from libs import run_model, calculate_doubling_time\n",
+    "from model_data import model_data\n",
     "\n",
     "print(\"Loaded libraries\")"
    ]
@@ -36,43 +34,43 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████████████████████████████████████████████████████████████████████| 100000/100000 [00:05<00:00, 16989.81it/s]\n",
-      "100%|██████████████████████████████████████████████████████████████████████████| 100000/100000 [00:07<00:00, 12581.74it/s]\n"
+      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100000/100000 [00:06<00:00, 15637.69it/s]\n",
+      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100000/100000 [00:07<00:00, 13032.56it/s]\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{1: 1102,\n",
-      " 5: 1252,\n",
-      " 10: 1343,\n",
+      "{1: 1098,\n",
+      " 5: 1254,\n",
+      " 10: 1344,\n",
       " 20: 1460,\n",
-      " 30: 1552,\n",
+      " 30: 1551,\n",
       " 40: 1634,\n",
-      " 50: 1716,\n",
+      " 50: 1714,\n",
       " 60: 1800,\n",
-      " 70: 1897,\n",
+      " 70: 1895,\n",
       " 80: 2013,\n",
-      " 90: 2189,\n",
-      " 95: 2348,\n",
-      " 99: 2666}\n",
+      " 90: 2193,\n",
+      " 95: 2347,\n",
+      " 99: 2671}\n",
       "\n",
       "-\n",
       "\n",
-      "{1: '2028 Mar 02',\n",
-      " 5: '2028 Jul 31',\n",
-      " 10: '2028 Oct 29',\n",
+      "{1: '2028 Feb 28',\n",
+      " 5: '2028 Aug 02',\n",
+      " 10: '2028 Oct 30',\n",
       " 20: '2029 Feb 23',\n",
       " 30: '2029 May 26',\n",
-      " 40: '2029 Aug 17',\n",
-      " 50: '2029 Nov 06',\n",
+      " 40: '2029 Aug 16',\n",
+      " 50: '2029 Nov 05',\n",
       " 60: '2030 Jan 29',\n",
-      " 70: '2030 May 07',\n",
+      " 70: '2030 May 05',\n",
       " 80: '2030 Aug 30',\n",
-      " 90: '2031 Feb 23',\n",
-      " 95: '2031 Jul 31',\n",
-      " 99: '2032 Jun 14'}\n"
+      " 90: '2031 Feb 26',\n",
+      " 95: '2031 Jul 30',\n",
+      " 99: '2032 Jun 18'}\n"
      ]
     }
    ],
@@ -87,9 +85,7 @@
     "    return days * measurement_error_variance\n",
     "\n",
     "\n",
-    "_ = run_model(\n",
-    "    metr_model, index_date=CLAUDE_3P7_LAUNCH_DATE\n",
-    ")  # Results should look similar to Figure 12"
+    "_ = run_model(metr_model, index_date=model_data['claude_3p7_sonnet']['launch_date'])  # Results should look similar to Figure 12"
    ]
   },
   {
@@ -102,58 +98,58 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████████████████████████████████████████████████████████████████████| 100000/100000 [00:06<00:00, 16320.64it/s]\n",
-      "100%|██████████████████████████████████████████████████████████████████████████| 100000/100000 [00:07<00:00, 12565.94it/s]\n"
+      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100000/100000 [00:06<00:00, 15426.34it/s]\n",
+      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100000/100000 [00:07<00:00, 12857.84it/s]\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{1: 546,\n",
-      " 5: 621,\n",
-      " 10: 666,\n",
-      " 20: 724,\n",
+      "{1: 547,\n",
+      " 5: 620,\n",
+      " 10: 665,\n",
+      " 20: 723,\n",
       " 30: 769,\n",
-      " 40: 811,\n",
+      " 40: 810,\n",
       " 50: 851,\n",
       " 60: 893,\n",
       " 70: 940,\n",
       " 80: 998,\n",
-      " 90: 1086,\n",
-      " 95: 1164,\n",
-      " 99: 1324}\n",
+      " 90: 1085,\n",
+      " 95: 1163,\n",
+      " 99: 1323}\n",
       "\n",
       "-\n",
       "\n",
-      "{1: '2026 Oct 14',\n",
-      " 5: '2026 Dec 28',\n",
-      " 10: '2027 Feb 11',\n",
-      " 20: '2027 Apr 11',\n",
+      "{1: '2026 Oct 16',\n",
+      " 5: '2026 Dec 27',\n",
+      " 10: '2027 Feb 10',\n",
+      " 20: '2027 Apr 10',\n",
       " 30: '2027 May 26',\n",
       " 40: '2027 Jul 06',\n",
       " 50: '2027 Aug 15',\n",
-      " 60: '2027 Sep 27',\n",
-      " 70: '2027 Nov 12',\n",
+      " 60: '2027 Sep 26',\n",
+      " 70: '2027 Nov 13',\n",
       " 80: '2028 Jan 10',\n",
-      " 90: '2028 Apr 07',\n",
-      " 95: '2028 Jun 24',\n",
+      " 90: '2028 Apr 06',\n",
+      " 95: '2028 Jun 23',\n",
       " 99: '2028 Nov 30'}\n"
      ]
     }
    ],
    "source": [
     "def metr_model_with_o3():\n",
     "    days = calculate_doubling_time(\n",
-    "        start_task_length=1.75, agi_task_length=167, doubling_time=118, acceleration=1\n",
+    "        start_task_length=model_data['o3']['performance_50p'],\n",
+    "        agi_task_length=167,\n",
+    "        doubling_time=118,\n",
+    "        acceleration=1\n",
     "    )  # Use o3 task length, o3 launch date, and the 2024-2025 doubling time\n",
-    "    measurement_error_variance = sq.invlognorm(\n",
-    "        0.8, 1.5\n",
-    "    )  # Add measurement error on tasks: SD fit to trend variance from Figure 12\n",
+    "    measurement_error_variance = sq.invlognorm(0.8, 1.5)  # Add measurement error on tasks: SD fit to trend variance from Figure 12\n",
     "    return days * measurement_error_variance\n",
     "\n",
-    "\n",
-    "_ = run_model(metr_model_with_o3, index_date=O3_LAUNCH_DATE)"
+    "_ = run_model(metr_model_with_o3, index_date=model_data['o3']['launch_date'])"
    ]
   },
   {
@@ -166,8 +162,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|█████████████████████████████████████████████████████████████████████████| 100000/100000 [00:00<00:00, 102136.93it/s]\n",
-      "100%|██████████████████████████████████████████████████████████████████████████| 100000/100000 [00:04<00:00, 20427.87it/s]\n"
+      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100000/100000 [00:01<00:00, 92619.45it/s]\n",
+      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100000/100000 [00:05<00:00, 19037.96it/s]\n"
      ]
     },
     {
@@ -219,7 +215,7 @@
     "    return days - shift\n",
     "\n",
     "\n",
-    "_ = run_model(simple_model, index_date=O3_LAUNCH_DATE)"
+    "_ = run_model(simple_model, index_date=model_data['o3']['launch_date'])"
    ]
   }
  ],

diff --git a/Track Acceleration.ipynb b/Track Acceleration.ipynb
@@ -16,14 +16,14 @@
    ],
    "source": [
     "from datetime import datetime\n",
-    "from typing import List, Tuple\n",
     "\n",
     "from libs import (\n",
     "    test_acceleration,\n",
     "    print_estimation,\n",
     "    bootstrap_growth_parameters,\n",
     "    sliding_window_analysis,\n",
     ")\n",
+    "from model_data import model_data\n",
     "\n",
     "print(\"Loaded libraries\")"
    ]
@@ -42,46 +42,31 @@
       "GPT‑2 to GPT5 (50%): (316, 0.908)\n",
       "GPT-3 to GPT5 (50%): (296, 0.9)\n",
       "GPT-4 to GPT5 (50%): (253, 0.9)\n",
-      "Claude 3 Opus to GPT5 (50%): (116, 1.0)\n",
+      "Claude 3 Opus to GPT5 (50%): (116, 1.0)\n",
       "GPT-3 to Claude 4.1 Opus (50%): (296, 0.9)\n",
       "GPT-4 to Claude 4.1 Opus (50%): (256, 0.9)\n",
-      "Claude 3 Opus to Claude 4.1 Opus (50%): (117, 1.0)\n",
-      "Claude 3 Opus to Claude 4.1 Opus (50%): (117, 1.0)\n",
+      "Claude 3 Opus to Claude 4.1 Opus (50%): (117, 1.0)\n",
+      "Claude 3 Opus to Claude 4.1 Opus (50%): (117, 1.0)\n",
       "\n",
       "=== 80% Reliability ===\n",
       "GPT‑2 to GPT5 (80%): (204, 0.974)\n",
       "GPT-3 to GPT5 (80%): (302, 0.9)\n",
       "GPT-4 to GPT5 (80%): (239, 0.9)\n",
-      "Claude 3 Opus to GPT5 (80%): (109, 1.0)\n",
+      "Claude 3 Opus to GPT5 (80%): (109, 1.0)\n",
       "GPT-3 to Claude 4.1 Opus (80%): (302, 0.9)\n",
       "GPT-4 to Claude 4.1 Opus (80%): (241, 0.9)\n",
-      "Claude 3 Opus to Claude 4.1 Opus (80%): (108, 1.0)\n",
-      "Claude 3 Opus to Claude 4.1 Opus (80%): (108, 1.0)\n"
+      "Claude 3 Opus to Claude 4.1 Opus (80%): (108, 1.0)\n",
+      "Claude 3 Opus to Claude 4.1 Opus (80%): (108, 1.0)\n"
      ]
     }
    ],
    "source": [
-    "observed_models: List[Tuple[str, datetime, float, float]] = [\n",
-    "    # model                       # release date         # task length at 50%  # task length at 80% (in hrs)\n",
-    "    (\"GPT‑2\", datetime(2019, 2, 14), 2 / 3600, 0.1 / 3600),\n",
-    "    (\"GPT-3\", datetime(2020, 5, 28), 9 / 3600, 2 / 3600),\n",
-    "    (\"GPT‑3.5 Turbo\", datetime(2023, 3, 1), 36 / 3600, 10 / 3600),\n",
-    "    (\"GPT-4\", datetime(2023, 3, 14), 6 / 60, 1 / 60),\n",
-    "    (\"GPT-4-Nov23\", datetime(2023, 11, 6), 8 / 60, 1 / 60),\n",
-    "    (\"Claude 3 Opus\", datetime(2024, 3, 4), 6 / 60, 1 / 60),\n",
-    "    (\"GPT‑4o\", datetime(2024, 5, 13), 9 / 60, 2 / 60),\n",
-    "    (\"Claude 3.5 Sonnet (old)\", datetime(2024, 6, 20), 18 / 60, 3 / 60),\n",
-    "    (\"o1 preview\", datetime(2024, 9, 12), 22 / 60, 4 / 60),\n",
-    "    (\"Claude 3.5 Sonnet (new)\", datetime(2024, 10, 22), 28 / 60, 5 / 60),\n",
-    "    (\"o1\", datetime(2024, 12, 5), 39 / 60, 6 / 60),\n",
-    "    (\"Claude 3.7 Sonnet\", datetime(2025, 2, 24), 59 / 60, 15 / 60),\n",
-    "    (\"o3\", datetime(2025, 4, 16), 1 + 45 / 60, 20 / 60),\n",
-    "    (\"Claude 4 Sonnet\", datetime(2025, 5, 22), 1 + 7 / 60, 16 / 60),\n",
-    "    (\"Claude 4 Opus\", datetime(2025, 5, 22), 1 + 19 / 60, 20 / 60),\n",
-    "    (\"Gemini 2.5 Pro\", datetime(2025, 6, 5), 39 / 60, 9 / 60),\n",
-    "    (\"Grok 4\", datetime(2025, 7, 9), 1 + 50 / 60, 15 / 60),\n",
-    "    (\"Claude 4.1 Opus\", datetime(2025, 8, 5), 1 + 45 / 60, 21 / 60),\n",
-    "    (\"GPT5\", datetime(2025, 8, 7), 2 + 17 / 60, 25 / 60),\n",
+    "# Construct observed_models from model_data\n",
+    "# Format: (model_name, release_date, task_length_50%, task_length_80%) in hours\n",
+    "observed_models = [\n",
+    "    (model['name'], model['launch_date'], model['performance_50p'], model['performance_80p'])\n",
+    "    for model in model_data.values()\n",
+    "    if model['performance_50p'] is not None  # Exclude models without data\n",
     "]\n",
     "\n",
     "print(\"=== 50% Reliability ===\")\n",
@@ -118,15 +103,15 @@
      "output_type": "stream",
      "text": [
       "=== Bootstrap Analysis ===\n",
-      "Current date: 2025-09-09\n",
+      "Current date: 2025-10-07\n",
       "\n",
       "50% Reliability:\n",
-      "Full dataset: (294, 0.903) (95% CI: {'doubling_time': (110, 333), 'acceleration': (0.9, 1.0)})\n",
-      "2024+ models: (127, 0.942) (95% CI: {'doubling_time': (110, 171), 'acceleration': (0.9, 1.0)})\n",
+      "Full dataset: (297, 0.901) (95% CI: {'doubling_time': (110, 333), 'acceleration': (0.9, 1.0)})\n",
+      "2024+ models: (125, 0.956) (95% CI: {'doubling_time': (110, 172), 'acceleration': (0.9, 1.0)})\n",
       "\n",
       "80% Reliability:\n",
-      "Full dataset: (233, 0.941) (95% CI: {'doubling_time': (119, 306), 'acceleration': (0.9, 1.0)})\n",
-      "2024+ models: (126, 0.927) (95% CI: {'doubling_time': (104, 159), 'acceleration': (0.9, 1.0)})\n",
+      "Full dataset: (231, 0.947) (95% CI: {'doubling_time': (121, 306), 'acceleration': (0.9, 1.0)})\n",
+      "2024+ models: (127, 0.922) (95% CI: {'doubling_time': (105, 157), 'acceleration': (0.9, 1.0)})\n",
       "\n",
       "=== Parameter stability by time window ===\n",
       "       doubling_time             acceleration          \n",
@@ -200,11 +185,11 @@
    ],
    "source": [
     "test_acceleration(\n",
-    "    start_task_length=2 / 60 / 60,  # GPT2\n",
+    "    start_task_length=model_data['gpt2']['performance_50p'],\n",
     "    agi_task_length=167,\n",
     "    initial_doubling_time=260,\n",
     "    acceleration=0.95,\n",
-    "    start_date=\"2019-02-14\",  # GPT2\n",
+    "    start_date=model_data['gpt2']['launch_date'].strftime('%Y-%m-%d'),\n",
     ")"
    ]
   }

diff --git a/Untitled.ipynb b/Untitled.ipynb