From fe523d5848b635361452ed6849fa6a72b999a693 Mon Sep 17 00:00:00 2001 From: orellabac Date: Wed, 18 Oct 2023 16:31:45 -0600 Subject: [PATCH] adding to_utc_timestamp --- CHANGE_LOG.txt | 5 ++ README.md | 1 + setup.py | 2 +- snowpark_extensions/functions_extensions.py | 54 ++++++++++++++++++++- tests/test_functions.py | 8 +++ 5 files changed, 67 insertions(+), 3 deletions(-) diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt index 41d99a9..914a284 100644 --- a/CHANGE_LOG.txt +++ b/CHANGE_LOG.txt @@ -177,3 +177,8 @@ Version 0.0.34 -------------- - explode have been removed from this library as it is supported natively by snowpark. - updated README providing information on how to use default `connections.toml` + + +Version 0.0.35 +-------------- +- added functions.to_utc_timestamp extension diff --git a/README.md b/README.md index df51d60..161e357 100644 --- a/README.md +++ b/README.md @@ -321,6 +321,7 @@ That will return: | functions.array_sort | sorts the input array in ascending order or descending order. The elements of the input array must be orderable. Null elements will be placed at the end of the returned array. | | ~~functions.unix_timestamp~~ | ~~returns the UNIX timestamp of current time.~~ **Available in snowpark-python >= 1.1.0** | | ~~functions.from_unixtimestamp~~ | ~~can be used to convert UNIX time to Snowflake timestamp~~ **Available in snowpark-python >= 1.1.0** | +| functions.to_utc_timestamp | converts a timezone-agnostic timestamp to a timezone-aware timestamp in the provided timezone before rendering that timestamp in UTC | | functions.format_number | formats numbers using the specified number of decimal places | | ~~functions.reverse~~ | ~~returns a reversed string~~ **Available in snowpark-python >= 1.2.0** | | ~~functions.explode~~ | ~~returns a new row for each element in the given array~~ **Available in snowpark-python >= 1.4.0** | diff --git a/setup.py b/setup.py index 929d082..886b1ba 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ this_directory = Path(__file__).parent long_description = (this_directory / "README.md").read_text() -VERSION = '0.0.34' +VERSION = '0.0.35' setup(name='snowpark_extensions', version=VERSION, diff --git a/snowpark_extensions/functions_extensions.py b/snowpark_extensions/functions_extensions.py index 6dd6385..4bbec0b 100644 --- a/snowpark_extensions/functions_extensions.py +++ b/snowpark_extensions/functions_extensions.py @@ -335,8 +335,57 @@ def map_values(obj:dict)->list: F._map_values_udf = map_values return F._map_values_udf(col) + timezoneMap = { + 'EST': '-05:00', + 'HST': '-10:00', + 'MST': '-07:00', + 'ACT': 'Australia/Darwin', + 'AET': 'Australia/Sydney', + 'AGT': 'America/Argentina/Buenos_Aires', + 'ART': 'Africa/Cairo', + 'AST': 'America/Anchorage', + 'BET': 'America/Sao_Paulo', + 'BST': 'Asia/Dhaka', + 'CAT': 'Africa/Harare', + 'CNT': 'America/St_Johns', + 'CST': 'America/Chicago', + 'CTT': 'Asia/Shanghai', + 'EAT': 'Africa/Addis_Ababa', + 'ECT': 'Europe/Paris', + 'IET': 'America/Indiana/Indianapolis', + 'IST': 'Asia/Kolkata', + 'JST': 'Asia/Tokyo', + 'MIT': 'Pacific/Apia', + 'NET': 'Asia/Yerevan', + 'NST': 'Pacific/Auckland', + 'PLT': 'Asia/Karachi', + 'PNT': 'America/Phoenix', + 'PRT': 'America/Puerto_Rico', + 'PST': 'America/Los_Angeles', + 'SST': 'Pacific/Guadalcanal', + 'VST': 'Asia/Ho_Chi_Minh' + } + def map_timestamp(tz_col): + from functools import reduce + when_exprs = F + for tz, offset in timezoneMap.items(): + when_exprs = when_exprs.when( tz_col == F.lit(tz), F.lit(offset)) + return when_exprs.otherwise(tz_col) + def to_utc_timestamp(timestamp:ColumnOrName, tz:ColumnOrLiteral): + """ + The function converts a timezone-agnostic timestamp to a timezone-aware timestamp in the provided timezone before rendering that timestamp in UTC. - + Arguments: + timestamp: column or Name + tz: A String with the time zone ID + """ + if isinstance(tz, str): + tz = timezoneMap.get(tz, tz) + tz = F.lit(tz) + elif isinstance(tz,Column): + tz = map_timestamp(tz) + timestamp = _to_col_if_str(timestamp,"to_utc_timestamp") + return F.convert_timezone(F.lit('UTC'),timestamp,tz) @@ -351,4 +400,5 @@ def map_values(obj:dict)->list: F.flatten = _array_flatten F.map_values = _map_values F.regexp_split = _regexp_split - F.sort_array = _sort_array \ No newline at end of file + F.sort_array = _sort_array + F.to_utc_timestamp = to_utc_timestamp \ No newline at end of file diff --git a/tests/test_functions.py b/tests/test_functions.py index b474541..9c3b464 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -362,3 +362,11 @@ def test_regexp_split(): res = df.select(regexp_split(df.s, '".+?"', 4).alias('s')).collect() assert res[0].S == '[\n ""\n]' +def test_to_utc_timestamp(): + session = Session.builder.from_snowsql().config("schema","PUBLIC").getOrCreate() + from snowflake.snowpark.functions import to_utc_timestamp + df = session.createDataFrame([('1997-02-28 10:30:00', 'JST')], ['ts', 'tz']) + res = df.select(to_utc_timestamp(df.ts, "PST").alias('utc_time')).collect() + assert res[0][0] == datetime.datetime(1997, 2, 28, 18, 30) + res = df.select(to_utc_timestamp(df.ts, df.tz).alias('utc_time')).collect() + assert res[0][0] == datetime.datetime(1997, 2, 28, 1, 30) \ No newline at end of file