Skip to content

Commit

Permalink
fix(pandas): use arrays for values and indices in time_features (#143)
Browse files Browse the repository at this point in the history
Co-authored-by: José Morales <[email protected]>
  • Loading branch information
elephaint and jmoralez authored Jan 16, 2025
1 parent ebdba72 commit fd00d55
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 27 deletions.
64 changes: 41 additions & 23 deletions nbs/feature_engineering.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -849,7 +849,7 @@
" if isinstance(times, pd.DatetimeIndex):\n",
" if feature in (\"week\", \"weekofyear\"):\n",
" times = times.isocalendar()\n",
" feat_vals = getattr(times, feature)\n",
" feat_vals = getattr(times, feature).to_numpy()\n",
" else:\n",
" feat_vals = getattr(times.dt, feature)()\n",
" return feat_name, feat_vals\n",
Expand All @@ -864,7 +864,7 @@
" if isinstance(df, pd.DataFrame):\n",
" times = pd.Index(unique_times)\n",
" time2pos = {time: i for i, time in enumerate(times)}\n",
" restore_idxs = df[time_col].map(time2pos)\n",
" restore_idxs = df[time_col].map(time2pos).to_numpy()\n",
" for feature in features:\n",
" name, vals = _compute_time_feature(times, feature)\n",
" df[name] = vals[restore_idxs]\n",
Expand Down Expand Up @@ -971,6 +971,7 @@
" <th>y</th>\n",
" <th>month</th>\n",
" <th>day</th>\n",
" <th>week</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
Expand All @@ -981,6 +982,7 @@
" <td>0.428973</td>\n",
" <td>10</td>\n",
" <td>5</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
Expand All @@ -989,6 +991,7 @@
" <td>1.423626</td>\n",
" <td>10</td>\n",
" <td>6</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
Expand All @@ -997,6 +1000,7 @@
" <td>2.311782</td>\n",
" <td>10</td>\n",
" <td>7</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
Expand All @@ -1005,6 +1009,7 @@
" <td>3.192191</td>\n",
" <td>10</td>\n",
" <td>8</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
Expand All @@ -1013,6 +1018,7 @@
" <td>4.148767</td>\n",
" <td>10</td>\n",
" <td>9</td>\n",
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
Expand All @@ -1021,6 +1027,7 @@
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1096</th>\n",
Expand All @@ -1029,6 +1036,7 @@
" <td>4.058910</td>\n",
" <td>5</td>\n",
" <td>10</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1097</th>\n",
Expand All @@ -1037,6 +1045,7 @@
" <td>5.178157</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1098</th>\n",
Expand All @@ -1045,6 +1054,7 @@
" <td>6.133142</td>\n",
" <td>5</td>\n",
" <td>12</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1099</th>\n",
Expand All @@ -1053,6 +1063,7 @@
" <td>0.403709</td>\n",
" <td>5</td>\n",
" <td>13</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1100</th>\n",
Expand All @@ -1061,27 +1072,28 @@
" <td>1.081779</td>\n",
" <td>5</td>\n",
" <td>14</td>\n",
" <td>20</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1101 rows × 5 columns</p>\n",
"<p>1101 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" unique_id ds y month day\n",
"0 0 2000-10-05 0.428973 10 5\n",
"1 0 2000-10-06 1.423626 10 6\n",
"2 0 2000-10-07 2.311782 10 7\n",
"3 0 2000-10-08 3.192191 10 8\n",
"4 0 2000-10-09 4.148767 10 9\n",
"... ... ... ... ... ...\n",
"1096 4 2001-05-10 4.058910 5 10\n",
"1097 4 2001-05-11 5.178157 5 11\n",
"1098 4 2001-05-12 6.133142 5 12\n",
"1099 4 2001-05-13 0.403709 5 13\n",
"1100 4 2001-05-14 1.081779 5 14\n",
" unique_id ds y month day week\n",
"0 0 2000-10-05 0.428973 10 5 40\n",
"1 0 2000-10-06 1.423626 10 6 40\n",
"2 0 2000-10-07 2.311782 10 7 40\n",
"3 0 2000-10-08 3.192191 10 8 40\n",
"4 0 2000-10-09 4.148767 10 9 41\n",
"... ... ... ... ... ... ...\n",
"1096 4 2001-05-10 4.058910 5 10 19\n",
"1097 4 2001-05-11 5.178157 5 11 19\n",
"1098 4 2001-05-12 6.133142 5 12 19\n",
"1099 4 2001-05-13 0.403709 5 13 19\n",
"1100 4 2001-05-14 1.081779 5 14 20\n",
"\n",
"[1101 rows x 5 columns]"
"[1101 rows x 6 columns]"
]
},
"execution_count": null,
Expand All @@ -1090,7 +1102,7 @@
}
],
"source": [
"transformed_df, future_df = time_features(series, freq='D', features=['month', 'day'], h=1)\n",
"transformed_df, future_df = time_features(series, freq='D', features=['month', 'day', 'week'], h=1)\n",
"transformed_df"
]
},
Expand Down Expand Up @@ -1125,6 +1137,7 @@
" <th>ds</th>\n",
" <th>month</th>\n",
" <th>day</th>\n",
" <th>week</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
Expand All @@ -1134,46 +1147,51 @@
" <td>2001-05-15</td>\n",
" <td>5</td>\n",
" <td>15</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>2001-05-15</td>\n",
" <td>5</td>\n",
" <td>15</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>2001-05-15</td>\n",
" <td>5</td>\n",
" <td>15</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>2001-05-15</td>\n",
" <td>5</td>\n",
" <td>15</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>2001-05-15</td>\n",
" <td>5</td>\n",
" <td>15</td>\n",
" <td>20</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" unique_id ds month day\n",
"0 0 2001-05-15 5 15\n",
"1 1 2001-05-15 5 15\n",
"2 2 2001-05-15 5 15\n",
"3 3 2001-05-15 5 15\n",
"4 4 2001-05-15 5 15"
" unique_id ds month day week\n",
"0 0 2001-05-15 5 15 20\n",
"1 1 2001-05-15 5 15 20\n",
"2 2 2001-05-15 5 15 20\n",
"3 3 2001-05-15 5 15 20\n",
"4 4 2001-05-15 5 15 20"
]
},
"execution_count": null,
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[DEFAULT]
repo = utilsforecast
lib_name = utilsforecast
version = 0.2.10
version = 0.2.11
min_python = 3.8
license = apache2
black_formatting = True
Expand Down
2 changes: 1 addition & 1 deletion utilsforecast/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.10"
__version__ = "0.2.11"
4 changes: 2 additions & 2 deletions utilsforecast/feature_engineering.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def _compute_time_feature(
if isinstance(times, pd.DatetimeIndex):
if feature in ("week", "weekofyear"):
times = times.isocalendar()
feat_vals = getattr(times, feature)
feat_vals = getattr(times, feature).to_numpy()
else:
feat_vals = getattr(times.dt, feature)()
return feat_name, feat_vals
Expand All @@ -228,7 +228,7 @@ def _add_time_features(
if isinstance(df, pd.DataFrame):
times = pd.Index(unique_times)
time2pos = {time: i for i, time in enumerate(times)}
restore_idxs = df[time_col].map(time2pos)
restore_idxs = df[time_col].map(time2pos).to_numpy()
for feature in features:
name, vals = _compute_time_feature(times, feature)
df[name] = vals[restore_idxs]
Expand Down

0 comments on commit fd00d55

Please sign in to comment.