Skip to content

Commit

Permalink
Only flush column if not empty to avoid extraneous columnar_update ca…
Browse files Browse the repository at this point in the history
…lls (#1538)

## Description

This doesn't have much effect on standard metrics, but we make an
extraneous call to columnar_update when converting to a view even if the
cache is empty, which leads to a surprising number of calls and updates
called with empty data.

## Changes

- Check if the cache if empty before flushing the cache in column
profiles

- [x] I have reviewed the [Guidelines for Contributing](CONTRIBUTING.md)
and the [Code of Conduct](CODE_OF_CONDUCT.md).
  • Loading branch information
jamie256 authored Jun 25, 2024
1 parent d06b869 commit 7515acd
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 19 deletions.
36 changes: 18 additions & 18 deletions python/examples/advanced/Custom_Metrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {
"id": "HlFNH-H6-qKg"
},
Expand All @@ -195,7 +195,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -219,16 +219,16 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>histogram/n</th>\n",
" <th>histogram/max</th>\n",
" <th>histogram/median</th>\n",
" <th>histogram/min</th>\n",
" <th>histogram/n</th>\n",
" <th>histogram/q_10</th>\n",
" <th>histogram/q_25</th>\n",
" <th>histogram/median</th>\n",
" <th>histogram/q_75</th>\n",
" <th>histogram/q_90</th>\n",
" <th>struct/x</th>\n",
" <th>struct/s</th>\n",
" <th>struct/x</th>\n",
" <th>type</th>\n",
" </tr>\n",
" <tr>\n",
Expand All @@ -249,37 +249,37 @@
" <tbody>\n",
" <tr>\n",
" <th>col1</th>\n",
" <td>1</td>\n",
" <td>1.2</td>\n",
" <td>1.2</td>\n",
" <td>1.2</td>\n",
" <td>1</td>\n",
" <td>1.2</td>\n",
" <td>1.2</td>\n",
" <td>1.2</td>\n",
" <td>1.2</td>\n",
" <td>2</td>\n",
" <td>aa</td>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" <td>SummaryType.COLUMN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" histogram/n histogram/max histogram/min histogram/q_10 \\\n",
"column \n",
"col1 1 1.2 1.2 1.2 \n",
" histogram/max histogram/median histogram/min histogram/n \\\n",
"column \n",
"col1 1.2 1.2 1.2 1 \n",
"\n",
" histogram/q_25 histogram/median histogram/q_75 histogram/q_90 \\\n",
"column \n",
"col1 1.2 1.2 1.2 1.2 \n",
" histogram/q_10 histogram/q_25 histogram/q_75 histogram/q_90 \\\n",
"column \n",
"col1 1.2 1.2 1.2 1.2 \n",
"\n",
" struct/x struct/s type \n",
" struct/s struct/x type \n",
"column \n",
"col1 2 aa SummaryType.COLUMN "
"col1 a 1 SummaryType.COLUMN "
]
},
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -310,7 +310,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.9.19"
},
"vscode": {
"interpreter": {
Expand Down
3 changes: 2 additions & 1 deletion python/whylogs/core/column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def flush(self) -> None:
logger.debug("Flushing out the cache for col: %s. Cache size: %s", self._name, self._cache_size)
old_cache = self._cache
self._cache = []
self.track_column(old_cache)
if old_cache:
self.track_column(old_cache)

def _process_extracted_column(self, extracted_column: PreprocessedColumn) -> None:
for metric in self._metrics.values():
Expand Down

0 comments on commit 7515acd

Please sign in to comment.