From fd00d552b152f454bd5c2a778cbbc83d2d36f639 Mon Sep 17 00:00:00 2001
From: Olivier Sprangers <45119856+elephaint@users.noreply.github.com>
Date: Thu, 16 Jan 2025 19:20:53 +0100
Subject: [PATCH] fix(pandas): use arrays for values and indices in
 time_features (#143)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: José Morales <jmoralz92@gmail.com>
---
 nbs/feature_engineering.ipynb        | 64 ++++++++++++++++++----------
 settings.ini                         |  2 +-
 utilsforecast/__init__.py            |  2 +-
 utilsforecast/feature_engineering.py |  4 +-
 4 files changed, 45 insertions(+), 27 deletions(-)
diff --git a/nbs/feature_engineering.ipynb b/nbs/feature_engineering.ipynb
index f3cc951..a630404 100644
--- a/nbs/feature_engineering.ipynb
+++ b/nbs/feature_engineering.ipynb
@@ -849,7 +849,7 @@
     "        if isinstance(times, pd.DatetimeIndex):\n",
     "            if feature in (\"week\", \"weekofyear\"):\n",
     "                times = times.isocalendar()\n",
-    "            feat_vals = getattr(times, feature)\n",
+    "            feat_vals = getattr(times, feature).to_numpy()\n",
     "        else:\n",
     "            feat_vals = getattr(times.dt, feature)()\n",
     "    return feat_name, feat_vals\n",
@@ -864,7 +864,7 @@
     "    if isinstance(df, pd.DataFrame):\n",
     "        times = pd.Index(unique_times)\n",
     "        time2pos = {time: i for i, time in enumerate(times)}\n",
-    "        restore_idxs = df[time_col].map(time2pos)\n",
+    "        restore_idxs = df[time_col].map(time2pos).to_numpy()\n",
     "        for feature in features:\n",
     "            name, vals = _compute_time_feature(times, feature)\n",
     "            df[name] = vals[restore_idxs]\n",
@@ -971,6 +971,7 @@
        "      <th>y</th>\n",
        "      <th>month</th>\n",
        "      <th>day</th>\n",
+       "      <th>week</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -981,6 +982,7 @@
        "      <td>0.428973</td>\n",
        "      <td>10</td>\n",
        "      <td>5</td>\n",
+       "      <td>40</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -989,6 +991,7 @@
        "      <td>1.423626</td>\n",
        "      <td>10</td>\n",
        "      <td>6</td>\n",
+       "      <td>40</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -997,6 +1000,7 @@
        "      <td>2.311782</td>\n",
        "      <td>10</td>\n",
        "      <td>7</td>\n",
+       "      <td>40</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1005,6 +1009,7 @@
        "      <td>3.192191</td>\n",
        "      <td>10</td>\n",
        "      <td>8</td>\n",
+       "      <td>40</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -1013,6 +1018,7 @@
        "      <td>4.148767</td>\n",
        "      <td>10</td>\n",
        "      <td>9</td>\n",
+       "      <td>41</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -1021,6 +1027,7 @@
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
+       "      <td>...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1096</th>\n",
@@ -1029,6 +1036,7 @@
        "      <td>4.058910</td>\n",
        "      <td>5</td>\n",
        "      <td>10</td>\n",
+       "      <td>19</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1097</th>\n",
@@ -1037,6 +1045,7 @@
        "      <td>5.178157</td>\n",
        "      <td>5</td>\n",
        "      <td>11</td>\n",
+       "      <td>19</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1098</th>\n",
@@ -1045,6 +1054,7 @@
        "      <td>6.133142</td>\n",
        "      <td>5</td>\n",
        "      <td>12</td>\n",
+       "      <td>19</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1099</th>\n",
@@ -1053,6 +1063,7 @@
        "      <td>0.403709</td>\n",
        "      <td>5</td>\n",
        "      <td>13</td>\n",
+       "      <td>19</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1100</th>\n",
@@ -1061,27 +1072,28 @@
        "      <td>1.081779</td>\n",
        "      <td>5</td>\n",
        "      <td>14</td>\n",
+       "      <td>20</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>1101 rows × 5 columns</p>\n",
+       "<p>1101 rows × 6 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "     unique_id         ds         y  month  day\n",
-       "0            0 2000-10-05  0.428973     10    5\n",
-       "1            0 2000-10-06  1.423626     10    6\n",
-       "2            0 2000-10-07  2.311782     10    7\n",
-       "3            0 2000-10-08  3.192191     10    8\n",
-       "4            0 2000-10-09  4.148767     10    9\n",
-       "...        ...        ...       ...    ...  ...\n",
-       "1096         4 2001-05-10  4.058910      5   10\n",
-       "1097         4 2001-05-11  5.178157      5   11\n",
-       "1098         4 2001-05-12  6.133142      5   12\n",
-       "1099         4 2001-05-13  0.403709      5   13\n",
-       "1100         4 2001-05-14  1.081779      5   14\n",
+       "     unique_id         ds         y  month  day  week\n",
+       "0            0 2000-10-05  0.428973     10    5    40\n",
+       "1            0 2000-10-06  1.423626     10    6    40\n",
+       "2            0 2000-10-07  2.311782     10    7    40\n",
+       "3            0 2000-10-08  3.192191     10    8    40\n",
+       "4            0 2000-10-09  4.148767     10    9    41\n",
+       "...        ...        ...       ...    ...  ...   ...\n",
+       "1096         4 2001-05-10  4.058910      5   10    19\n",
+       "1097         4 2001-05-11  5.178157      5   11    19\n",
+       "1098         4 2001-05-12  6.133142      5   12    19\n",
+       "1099         4 2001-05-13  0.403709      5   13    19\n",
+       "1100         4 2001-05-14  1.081779      5   14    20\n",
        "\n",
-       "[1101 rows x 5 columns]"
+       "[1101 rows x 6 columns]"
       ]
      },
      "execution_count": null,
@@ -1090,7 +1102,7 @@
     }
    ],
    "source": [
-    "transformed_df, future_df = time_features(series, freq='D', features=['month', 'day'], h=1)\n",
+    "transformed_df, future_df = time_features(series, freq='D', features=['month', 'day', 'week'], h=1)\n",
     "transformed_df"
    ]
   },
@@ -1125,6 +1137,7 @@
        "      <th>ds</th>\n",
        "      <th>month</th>\n",
        "      <th>day</th>\n",
+       "      <th>week</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1134,6 +1147,7 @@
        "      <td>2001-05-15</td>\n",
        "      <td>5</td>\n",
        "      <td>15</td>\n",
+       "      <td>20</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -1141,6 +1155,7 @@
        "      <td>2001-05-15</td>\n",
        "      <td>5</td>\n",
        "      <td>15</td>\n",
+       "      <td>20</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -1148,6 +1163,7 @@
        "      <td>2001-05-15</td>\n",
        "      <td>5</td>\n",
        "      <td>15</td>\n",
+       "      <td>20</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1155,6 +1171,7 @@
        "      <td>2001-05-15</td>\n",
        "      <td>5</td>\n",
        "      <td>15</td>\n",
+       "      <td>20</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -1162,18 +1179,19 @@
        "      <td>2001-05-15</td>\n",
        "      <td>5</td>\n",
        "      <td>15</td>\n",
+       "      <td>20</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  unique_id         ds  month  day\n",
-       "0         0 2001-05-15      5   15\n",
-       "1         1 2001-05-15      5   15\n",
-       "2         2 2001-05-15      5   15\n",
-       "3         3 2001-05-15      5   15\n",
-       "4         4 2001-05-15      5   15"
+       "  unique_id         ds  month  day  week\n",
+       "0         0 2001-05-15      5   15    20\n",
+       "1         1 2001-05-15      5   15    20\n",
+       "2         2 2001-05-15      5   15    20\n",
+       "3         3 2001-05-15      5   15    20\n",
+       "4         4 2001-05-15      5   15    20"
       ]
      },
      "execution_count": null,
diff --git a/settings.ini b/settings.ini
index 1132ea8..92978fc 100644
--- a/settings.ini
+++ b/settings.ini
@@ -1,7 +1,7 @@
 [DEFAULT]
 repo = utilsforecast
 lib_name = utilsforecast
-version = 0.2.10
+version = 0.2.11
 min_python = 3.8
 license = apache2
 black_formatting = True
diff --git a/utilsforecast/__init__.py b/utilsforecast/__init__.py
index 6232f7a..5635676 100644
--- a/utilsforecast/__init__.py
+++ b/utilsforecast/__init__.py
@@ -1 +1 @@
-__version__ = "0.2.10"
+__version__ = "0.2.11"
diff --git a/utilsforecast/feature_engineering.py b/utilsforecast/feature_engineering.py
index 9502136..bb3c085 100644
--- a/utilsforecast/feature_engineering.py
+++ b/utilsforecast/feature_engineering.py
@@ -212,7 +212,7 @@ def _compute_time_feature(
         if isinstance(times, pd.DatetimeIndex):
             if feature in ("week", "weekofyear"):
                 times = times.isocalendar()
-            feat_vals = getattr(times, feature)
+            feat_vals = getattr(times, feature).to_numpy()
         else:
             feat_vals = getattr(times.dt, feature)()
     return feat_name, feat_vals
@@ -228,7 +228,7 @@ def _add_time_features(
     if isinstance(df, pd.DataFrame):
         times = pd.Index(unique_times)
         time2pos = {time: i for i, time in enumerate(times)}
-        restore_idxs = df[time_col].map(time2pos)
+        restore_idxs = df[time_col].map(time2pos).to_numpy()
         for feature in features:
             name, vals = _compute_time_feature(times, feature)
             df[name] = vals[restore_idxs]