Skip to content

Commit

Permalink
Merge pull request #33 from sot/performance
Browse files Browse the repository at this point in the history
Add generic fast format converters for better performance
  • Loading branch information
taldcroft authored Jul 7, 2023
2 parents 717bed5 + dc28991 commit 733aa18
Show file tree
Hide file tree
Showing 8 changed files with 632 additions and 200 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/flake8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ jobs:

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.8
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: 3.8
python-version: "3.10"
- name: Lint with flake8
run: |
pip install flake8
Expand Down
3 changes: 2 additions & 1 deletion cxotime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from astropy import units # noqa
from astropy.time import TimeDelta # noqa

from .cxotime import CxoTime, CxoTimeLike, date2secs, secs2date # noqa
from .convert import * # noqa
from .cxotime import CxoTime, CxoTimeLike # noqa: F401

__version__ = ska_helpers.get_version(__package__)

Expand Down
326 changes: 326 additions & 0 deletions cxotime/convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,326 @@
import datetime
import re
import sys

import erfa
import numpy as np
from astropy.time.formats import TIME_FORMATS, TimeYearDayTime, _parse_times

from .cxotime import CxoTime, TimeGreta, TimeMaude

__all__ = ["print_time_conversions", "convert_time_format"]


def print_time_conversions():
"""Interface to entry_point script ``cxotime`` to print time conversions"""
date = None if len(sys.argv) == 1 else sys.argv[1]
date = CxoTime(date)
date.print_conversions()


def convert_time_format(val, fmt_out, *, fmt_in=None):
"""
Convert a time to a different format.
Parameters
----------
val : CxoTimeLike
Time value
fmt_out : str
Output format
fmt_in : str
Input format (default is to guess)
Returns
-------
val_out : str
Time string in output format
"""
jd1, jd2 = None, None
if fmt_in is None:
# Get the format. For some formats the jd1/jd2 values are generated as a
# byproduct so we can capture them to avoid re-calculating later.
fmt_in, jd1, jd2 = get_format(val)

try:
converter_in = globals()[f"convert_{fmt_in}_to_jd1_jd2"]
converter_out = globals()[f"convert_jd1_jd2_to_{fmt_out}"]
except KeyError:
# Don't have a converter for this format, so use full CxoTime guessing
kwargs = {} if fmt_in is None else {"format": fmt_in}
tm = CxoTime(val, **kwargs)
out = getattr(tm, fmt_out)
return out

if jd1 is None or jd2 is None:
jd1, jd2 = converter_in(val)
out = converter_out(jd1, jd2)

# If the output is a scalar ndarray then return a scalar pure Python type
if isinstance(out, np.ndarray) and out.shape == ():
out = out.item()

return out


def get_format(val):
"""Get time format of ``val`` and return jd1, jd2 if available.
This infers the time format (e.g. 'secs', 'date', 'greta', 'maude') based on the
input ``val``. In some cases inferring the format has the side effect of computing
the jd1, jd2 values, so those are returned if available.
The jd1, jd2 values are the internal representation of time used in the astropy Time
class, with jd1 being the integer part and jd2 being the fractional remainder of the
date as a float Julian Date.
:param val: str, float, obj
Time value
:returns: tuple
(format, jd1, jd2)
"""
fmt_in = None
jd1 = None
jd2 = None

if val is None:
return fmt_in, jd1, jd2

val = np.asarray(val)

# First check for a number or array of numbers, which implies CXC seconds
if issubclass(val.dtype.type, np.number):
fmt_in = "secs"
jd1, jd2 = convert_secs_to_jd1_jd2(val)
return fmt_in, jd1, jd2

if val.dtype.kind not in ("U", "S"):
# Not a number or string (could be CxoTime obj or None), so push this
# back for generic conversion by CxoTime
return fmt_in, jd1, jd2

convert_funcs = [
convert_date_to_jd1_jd2,
convert_greta_to_jd1_jd2,
convert_maude_to_jd1_jd2,
]
for convert_func in convert_funcs:
try:
jd1, jd2 = convert_func(val)
fmt_in = convert_func.__name__.split("_")[1]
break
except Exception:
pass

return fmt_in, jd1, jd2


def convert_jd1_jd2_to_secs(jd1, jd2):
# Transform to TT via TAI
jd1, jd2, _ = erfa.ufunc.utctai(jd1, jd2)
jd1, jd2, _ = erfa.ufunc.taitt(jd1, jd2)

# Fixed offsets taken from CxoTime(0.0).tt.jd1,2
time_from_epoch1 = (jd1 - 2450814.0) * 86400.0
time_from_epoch2 = (jd2 - 0.5) * 86400.0

secs = time_from_epoch1 + time_from_epoch2
return secs


def convert_date_to_jd1_jd2(date):
# Performance note: using isinstance(date, np.ndarray) is a few times faster than
# date = np.asarrray(date). (64 ns vs 226 ns).
if not isinstance(date, np.ndarray):
date = np.array(date)
jd1, jd2 = convert_string_to_jd1_jd2(date, TimeYearDayTime)
return jd1, jd2


def convert_greta_to_jd1_jd2(date):
if not isinstance(date, np.ndarray):
date = np.array(date)

# Allow for numeric input but reformat as string
if date.dtype.kind in ("f", "i"):
date = np.array([f"{x:.9f}" for x in date.flat]).reshape(date.shape)

jd1, jd2 = convert_string_to_jd1_jd2(date, TimeGreta)
return jd1, jd2


def convert_maude_to_jd1_jd2(date):
if not isinstance(date, np.ndarray):
date = np.array(date)

if date.dtype.kind == "i":
date = date.astype("S")

jd1, jd2 = convert_string_to_jd1_jd2(date, TimeMaude)
return jd1, jd2


def convert_jd1_jd2_to_greta(jd1, jd2):
date = convert_jd1_jd2_to_date(jd1, jd2)
# Convert '1997:365:23:58:57.816' to '1997365.235857816'
# 012345678901234567890
if isinstance(date, np.ndarray):
out = np.array(
[
x[:4] + x[5:8] + "." + x[9:11] + x[12:14] + x[15:17] + x[18:21]
for x in date.flat
]
)
out.shape = date.shape
else:
x = date # 15 ns
out = x[:4] + x[5:8] + "." + x[9:11] + x[12:14] + x[15:17] + x[18:21] # 660 ns
return out


def convert_jd1_jd2_to_maude(jd1, jd2):
date = convert_jd1_jd2_to_date(jd1, jd2)
if isinstance(date, np.ndarray):
out = np.array(
[
x[:4] + x[5:8] + x[9:11] + x[12:14] + x[15:17] + x[18:21]
for x in date.flat
]
)
out = out.astype("i8")
out.shape = date.shape
else:
x = date
out = int(x[:4] + x[5:8] + x[9:11] + x[12:14] + x[15:17] + x[18:21])
return out


def convert_jd1_jd2_to_jd(jd1, jd2):
jd = jd1 + jd2
return jd


def convert_jd_to_jd1_jd2(jd):
jd1 = np.asarray(jd)
jd2 = 0.0
return jd1, jd2


def convert_string_to_jd1_jd2(date, time_format_cls):
if date.dtype.kind == "U":
# This assumes the input is pure ASCII.
val1_uint32 = date.view((np.uint32, date.dtype.itemsize // 4))
chars = val1_uint32.astype(_parse_times.dt_u1)
else:
chars = date.view((_parse_times.dt_u1, date.dtype.itemsize))

# Call the fast parsing ufunc.
time_struct = time_format_cls._fast_parser(chars)

# In these ERFA calls ignore the return value since we know jd1, jd2 are OK.
# Checking the return value via np.any nearly doubles the function time.

# Convert time ISO date to jd1, jd2
jd1, jd2, _ = erfa.ufunc.dtf2d(
b"UTC",
time_struct["year"],
time_struct["month"],
time_struct["day"],
time_struct["hour"],
time_struct["minute"],
time_struct["second"],
)

return jd1, jd2


def convert_jd1_jd2_to_date(jd1, jd2):
iys, ims, ids, ihmsfs = erfa.d2dtf(b"TT", 3, jd1, jd2)
ihrs = ihmsfs["h"]
imins = ihmsfs["m"]
isecs = ihmsfs["s"]
ifracs = ihmsfs["f"]

if isinstance(jd1, np.ndarray):
dates = []
for iy, im, id, ihr, imin, isec, ifracsec in np.nditer(
[iys, ims, ids, ihrs, imins, isecs, ifracs], flags=["zerosize_ok"]
):
yday = datetime.datetime(iy, im, id).timetuple().tm_yday
date = f"{iy:4d}:{yday:03d}:{ihr:02d}:{imin:02d}:{isec:02d}.{ifracsec:03d}"
dates.append(date)

out = np.array(dates).reshape(jd1.shape)
else:
yday = datetime.datetime(iys, ims, ids).timetuple().tm_yday
out = f"{iys:4d}:{yday:03d}:{ihrs:02d}:{imins:02d}:{isecs:02d}.{ifracs:03d}"

return out


def convert_secs_to_jd1_jd2(secs):
jd2 = 0.0
try:
jd1 = secs / 86400.0 + 2450814.5
except TypeError:
# For a list input
jd1 = np.array(secs, dtype=np.float64) / 86400.0 + 2450814.5

# In these ERFA calls ignore the return value since we know jd1, jd2 are OK.
# Checking the return value via np.any is quite slow.
# Transform TT to UTC via TAI
jd1, jd2, _ = erfa.ufunc.tttai(jd1, jd2)
jd1, jd2, _ = erfa.ufunc.taiutc(jd1, jd2)
return jd1, jd2


def make_docstring(fmt_in, fmt_out):
import textwrap

fmt_in_cls = TIME_FORMATS[fmt_in]
doc_in = fmt_in_cls.convert_doc
fmt_out_cls = TIME_FORMATS[fmt_out]
doc_out = fmt_out_cls.convert_doc
equiv = f"CxoTime({doc_in['input_name']}, format='{fmt_in_cls.name}').{fmt_out_cls.name}"
out = f"""\
Convert {doc_in['descr_short']} to {doc_out['descr_short']}.
This is equivalent to ``{equiv}`` but potentially 10x faster.
Format in: {doc_in['input_format']}
Format out: {doc_out['output_format']}
Parameters
----------
{doc_in['input_name']} : {doc_in['input_type']}
{doc_in['descr_short']}
Returns
-------
{doc_out['input_name']} : {doc_out['output_type']}
{doc_out['descr_short']}
"""
return textwrap.dedent(out)


# Define shortcuts for converters like date2secs or greta2date.
# Accept each value of globals if it matches the pattern convert_jd1_jd2_to_...
CONVERT_FORMATS = [
m.group(1)
for fmt in list(globals())
if (m := re.match(r"convert_jd1_jd2_to_(\w+)", fmt))
]


for fmt1 in CONVERT_FORMATS:
input_name = TIME_FORMATS[fmt1].convert_doc["input_name"]
for fmt2 in CONVERT_FORMATS:
if fmt1 != fmt2:
name = f"{fmt1}2{fmt2}"
func_str = (
f"lambda {input_name}: "
f"convert_time_format({input_name}, fmt_in='{fmt1}', fmt_out='{fmt2}')"
)
func = globals()[name] = eval(func_str)
func.__doc__ = make_docstring(fmt1, fmt2)
__all__.append(name)
Loading

0 comments on commit 733aa18

Please sign in to comment.