From 57c373e51321d5625df38bf9e16f09f170cbc27e Mon Sep 17 00:00:00 2001 From: Tristan Nixon Date: Thu, 18 Apr 2024 08:59:23 -0700 Subject: [PATCH 1/2] fix timestamp col type checking (#398) --- python/tempo/tsdf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/tempo/tsdf.py b/python/tempo/tsdf.py index b8859353..2ede43c2 100644 --- a/python/tempo/tsdf.py +++ b/python/tempo/tsdf.py @@ -13,6 +13,7 @@ from pyspark.sql import SparkSession from pyspark.sql.column import Column from pyspark.sql.dataframe import DataFrame +from pyspark.sql.types import TimestampType from pyspark.sql.window import Window, WindowSpec from scipy.fft import fft, fftfreq # type: ignore @@ -1102,7 +1103,7 @@ def withRangeStats( ] # build window - if str(self.df.schema[self.ts_col].dataType) == "TimestampType": + if isinstance(self.df.schema[self.ts_col].dataType, TimestampType): self.df = self.__add_double_ts() prohibited_cols.extend(["double_ts"]) w = self.__rangeBetweenWindow( From 96913f5c63b9154c79fc4554f62a343af5a77adc Mon Sep 17 00:00:00 2001 From: Tristan Nixon Date: Fri, 19 Apr 2024 15:07:41 -0700 Subject: [PATCH 2/2] Issue397 with range stats not recog timestamp (#400) * fix timestamp col type checking * fixing another instance of a bad runtime type check --- python/tempo/tsdf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tempo/tsdf.py b/python/tempo/tsdf.py index 2ede43c2..8ae1d44f 100644 --- a/python/tempo/tsdf.py +++ b/python/tempo/tsdf.py @@ -13,7 +13,7 @@ from pyspark.sql import SparkSession from pyspark.sql.column import Column from pyspark.sql.dataframe import DataFrame -from pyspark.sql.types import TimestampType +from pyspark.sql.types import StringType, TimestampType from pyspark.sql.window import Window, WindowSpec from scipy.fft import fft, fftfreq # type: ignore @@ -64,7 +64,7 @@ def __init__( # this if clause seems unneeded. Perhaps we should check for non-valid # Timestamp string matching then do some pattern matching to extract # the time stamp. - if df.schema[ts_col].dataType == "StringType": # pragma: no cover + if isinstance(df.schema[ts_col].dataType, StringType): # pragma: no cover sample_ts = df.limit(1).collect()[0][0] self.__validate_ts_string(sample_ts) self.df = self.__add_double_ts().withColumnRenamed("double_ts", self.ts_col)