Skip to content

Commit

Permalink
GH-44679: [C++][Python] Fix Flight Timestamp precision, revert workar…
Browse files Browse the repository at this point in the history
…ound from #43537 (#44681)

### What changes are included in this PR?
Make `arrow.flight.Timestamp` nanoseconds precision and OS-independent.

Use the `CTimePoint` for `FlightEndpoint.expiration_time` to have OS-independent nanoseconds precision.

https://github.com/apache/arrow/blob/093655c60783321c786a8e69632f185d37520f4d/python/pyarrow/includes/libarrow_flight.pxd#L133-L139

This reverts workaround for `expiration_time` from  #43537.

### Are these changes tested?
Existing unit tests are adjusted to the new precision.

### Are there any user-facing changes?

**This PR includes breaking changes to public APIs.**

The values of `FlightEndpoint.expiration_time` now have nanoseconds precision on any operating system.

* GitHub Issue: #44679

Authored-by: Enrico Minack <[email protected]>
Signed-off-by: David Li <[email protected]>
  • Loading branch information
EnricoMi authored Nov 14, 2024
1 parent d7bc378 commit d534e77
Show file tree
Hide file tree
Showing 8 changed files with 16 additions and 79 deletions.
18 changes: 6 additions & 12 deletions cpp/src/arrow/flight/flight_internals_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,9 @@ TEST(FlightTypes, FlightDescriptor) {
TEST(FlightTypes, FlightEndpoint) {
ASSERT_OK_AND_ASSIGN(auto location1, Location::ForGrpcTcp("localhost", 1024));
ASSERT_OK_AND_ASSIGN(auto location2, Location::ForGrpcTls("localhost", 1024));
// 2023-06-19 03:14:06.004330100
// We must use microsecond resolution here for portability.
// std::chrono::system_clock::time_point may not provide nanosecond
// resolution on some platforms such as Windows.
// 2023-06-19 03:14:06.004339123
const auto expiration_time_duration =
std::chrono::seconds{1687144446} + std::chrono::nanoseconds{4339000};
std::chrono::seconds{1687144446} + std::chrono::nanoseconds{4339123};
Timestamp expiration_time(
std::chrono::duration_cast<Timestamp::duration>(expiration_time_duration));
std::vector<FlightEndpoint> values = {
Expand All @@ -210,7 +207,7 @@ TEST(FlightTypes, FlightEndpoint) {
"<FlightEndpoint ticket=<Ticket ticket='bar'> locations=[] "
"expiration_time=null app_metadata='DEADBEEF'>",
"<FlightEndpoint ticket=<Ticket ticket='foo'> locations=[] "
"expiration_time=2023-06-19 03:14:06.004339000 app_metadata=''>",
"expiration_time=2023-06-19 03:14:06.004339123 app_metadata=''>",
"<FlightEndpoint ticket=<Ticket ticket='foo'> locations="
"[grpc+tcp://localhost:1024] expiration_time=null app_metadata=''>",
"<FlightEndpoint ticket=<Ticket ticket='bar'> locations="
Expand Down Expand Up @@ -271,12 +268,9 @@ TEST(FlightTypes, PollInfo) {
auto desc = FlightDescriptor::Command("foo");
auto endpoint = FlightEndpoint{Ticket{"foo"}, {}, std::nullopt, ""};
auto info = MakeFlightInfo(schema, desc, {endpoint}, -1, 42, true, "");
// 2023-06-19 03:14:06.004330100
// We must use microsecond resolution here for portability.
// std::chrono::system_clock::time_point may not provide nanosecond
// resolution on some platforms such as Windows.
// 2023-06-19 03:14:06.004339123
const auto expiration_time_duration =
std::chrono::seconds{1687144446} + std::chrono::nanoseconds{4339000};
std::chrono::seconds{1687144446} + std::chrono::nanoseconds{4339123};
Timestamp expiration_time(
std::chrono::duration_cast<Timestamp::duration>(expiration_time_duration));
std::vector<PollInfo> values = {
Expand All @@ -292,7 +286,7 @@ TEST(FlightTypes, PollInfo) {
"progress=null expiration_time=null>",
"<PollInfo info=" + info.ToString() +
" descriptor=<FlightDescriptor cmd='poll'> "
"progress=0.1 expiration_time=2023-06-19 03:14:06.004339000>",
"progress=0.1 expiration_time=2023-06-19 03:14:06.004339123>",
"<PollInfo info=null descriptor=null progress=null expiration_time=null>",
};

Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/flight/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ class FlightServerBase;
/// > all minutes are 60 seconds long, i.e. leap seconds are "smeared"
/// > so that no leap second table is needed for interpretation. Range
/// > is from 0001-01-01T00:00:00Z to 9999-12-31T23:59:59.999999999Z.
using Timestamp = std::chrono::system_clock::time_point;
using Timestamp =
std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;

/// \brief A Flight-specific status code. Used to encode some
/// additional status codes into an Arrow Status.
Expand Down
13 changes: 3 additions & 10 deletions python/pyarrow/_flight.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -750,11 +750,8 @@ cdef class FlightEndpoint(_Weakrefable):

if expiration_time is not None:
if isinstance(expiration_time, lib.TimestampScalar):
# Convert into OS-dependent std::chrono::system_clock::time_point from
# std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>
# See Timestamp in cpp/src/arrow/flight/types.h
self.endpoint.expiration_time = TimePoint_to_system_time(TimePoint_from_ns(
expiration_time.cast(timestamp("ns")).value))
self.endpoint.expiration_time = TimePoint_from_ns(
expiration_time.cast(timestamp("ns")).value)
else:
raise TypeError("Argument expiration_time must be a TimestampScalar, "
"not '{}'".format(type(expiration_time)))
Expand Down Expand Up @@ -786,11 +783,7 @@ cdef class FlightEndpoint(_Weakrefable):
cdef:
int64_t time_since_epoch
if self.endpoint.expiration_time.has_value():
time_since_epoch = TimePoint_to_ns(
# Convert from OS-dependent std::chrono::system_clock::time_point into
# std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>
# See Timestamp in cpp/src/arrow/flight/types.h
TimePoint_from_system_time(self.endpoint.expiration_time.value()))
time_since_epoch = TimePoint_to_ns(self.endpoint.expiration_time.value())
return lib.scalar(time_since_epoch, timestamp("ns", "UTC"))
return None

Expand Down
23 changes: 0 additions & 23 deletions python/pyarrow/includes/chrono.pxd

This file was deleted.

4 changes: 2 additions & 2 deletions python/pyarrow/includes/libarrow_flight.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.includes.chrono cimport time_point
from pyarrow.includes.libarrow_python cimport CTimePoint


cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
Expand Down Expand Up @@ -135,7 +135,7 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:

CTicket ticket
vector[CLocation] locations
optional[time_point] expiration_time
optional[CTimePoint] expiration_time
c_string app_metadata

bint operator==(CFlightEndpoint)
Expand Down
4 changes: 0 additions & 4 deletions python/pyarrow/includes/libarrow_python.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

# distutils: language = c++

from pyarrow.includes.chrono cimport time_point
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *

Expand Down Expand Up @@ -245,9 +244,6 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
CTimePoint TimePoint_from_s(double val)
CTimePoint TimePoint_from_ns(int64_t val)

CTimePoint TimePoint_from_system_time(time_point val)
time_point TimePoint_to_system_time(CTimePoint val)

CResult[c_string] TzinfoToString(PyObject* pytzinfo)
CResult[PyObject*] StringToTzinfo(c_string)

Expand Down
14 changes: 0 additions & 14 deletions python/pyarrow/src/arrow/python/datetime.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,20 +144,6 @@ inline TimePoint TimePoint_from_ns(int64_t val) {
return TimePoint(TimePoint::duration(val));
}

ARROW_PYTHON_EXPORT
// Note: Needed by FlightEndpoint.expiration_time, which is an OS-dependent
// std::chrono::system_clock::time_point
inline std::chrono::system_clock::time_point TimePoint_to_system_time(TimePoint val) {
return std::chrono::time_point_cast<std::chrono::system_clock::duration>(val);
}

ARROW_PYTHON_EXPORT
// Note: Needed by FlightEndpoint.expiration_time, which is an OS-dependent
// std::chrono::system_clock::time_point
inline TimePoint TimePoint_from_system_time(std::chrono::system_clock::time_point val) {
return std::chrono::time_point_cast<TimePoint::duration>(val);
}

ARROW_PYTHON_EXPORT
inline int64_t PyDelta_to_s(PyDateTime_Delta* pytimedelta) {
return (PyDateTime_DELTA_GET_DAYS(pytimedelta) * 86400LL +
Expand Down
16 changes: 3 additions & 13 deletions python/pyarrow/tests/test_flight.py
Original file line number Diff line number Diff line change
Expand Up @@ -1179,19 +1179,9 @@ def test_flight_get_info():
assert info.endpoints[0].expiration_time is None
assert info.endpoints[0].app_metadata == b""
assert info.endpoints[0].locations[0] == flight.Location('grpc://test')
# on macOS, system_clock::duration is milliseconds
# on Windows, system_clock::duration is 100 nanoseconds
# on Linux, system_clock::duration is nanoseconds
ts = None
if pa._platform.system() == 'Darwin':
ts = "2023-04-05T12:34:56.789012000+00:00"
elif pa._platform.system() == 'Windows':
ts = "2023-04-05T12:34:56.789012300+00:00"
elif pa._platform.system() == 'Linux':
ts = "2023-04-05T12:34:56.789012345+00:00"
if ts is not None:
assert info.endpoints[1].expiration_time == \
pa.scalar(ts).cast(pa.timestamp("ns", "UTC"))
assert info.endpoints[1].expiration_time == \
pa.scalar("2023-04-05T12:34:56.789012345+00:00") \
.cast(pa.timestamp("ns", "UTC"))
assert info.endpoints[1].app_metadata == b"endpoint app metadata"
assert info.endpoints[1].locations[0] == \
flight.Location.for_grpc_tcp('localhost', 5005)
Expand Down

0 comments on commit d534e77

Please sign in to comment.