From 6eb86278743c787c57341c4434edb2080b15004e Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Mon, 12 Aug 2024 09:19:35 -0400 Subject: [PATCH] fix: adjust checks in ForthMachine to prevent segfault when num_items is negative (#3209) * Adjusted checks in case num_items is negative * Added tests * Added documentation for new rule * Specify dtype for numpy arrays --- .../src/libawkward/forth/ForthMachine.cpp | 6 ++- docs/reference/awkwardforth.rst | 14 ++++++ ...wardforth_read_negative_number_of_items.py | 45 +++++++++++++++++++ 3 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 tests/test_3209_awkwardforth_read_negative_number_of_items.py diff --git a/awkward-cpp/src/libawkward/forth/ForthMachine.cpp b/awkward-cpp/src/libawkward/forth/ForthMachine.cpp index cffa8f6a91..c6b481cf7c 100644 --- a/awkward-cpp/src/libawkward/forth/ForthMachine.cpp +++ b/awkward-cpp/src/libawkward/forth/ForthMachine.cpp @@ -3075,7 +3075,7 @@ namespace awkward { uint64_t tmp; uint8_t tmpbyte; - if (items_remaining != 0) { + if (items_remaining > 0) { tmpbyte = input->read_byte(current_error_); if (current_error_ != util::ForthError::none) { return; @@ -3087,7 +3087,7 @@ namespace awkward { } data = tmp; } - while (items_remaining != 0) { + while (items_remaining > 0) { if (bits_wnd_r >= 8) { bits_wnd_r -= 8; bits_wnd_l -= 8; @@ -3230,6 +3230,7 @@ namespace awkward { break; \ } + if (num_items < 0) num_items = 0; switch (format) { case READ_BOOL: WRITE_DIRECTLY(bool, bool) case READ_INT8: WRITE_DIRECTLY(int8_t, int8) @@ -3311,6 +3312,7 @@ namespace awkward { break; \ } + if (num_items < 0) num_items = 0; switch (format) { case READ_BOOL: WRITE_TO_STACK(bool) case READ_INT8: WRITE_TO_STACK(int8_t) diff --git a/docs/reference/awkwardforth.rst b/docs/reference/awkwardforth.rst index 954179ba45..ad2bcd489f 100644 --- a/docs/reference/awkwardforth.rst +++ b/docs/reference/awkwardforth.rst @@ -1219,6 +1219,20 @@ and but the second is faster because it involves two Forth instructions and one ``memcpy``. +If the number of items to read is negative then it is interpreted as zero. + +.. code-block:: python + + >>> vm = ForthMachine32(""" + ... input x + ... output y float32 + ... + ... -1000000 x #d-> y + ... """) + >>> vm.run({"x": np.arange(1000000) * 1.1}) + >>> np.asarray(vm["y"]) + array([], dtype=float32) + Type codes """""""""" diff --git a/tests/test_3209_awkwardforth_read_negative_number_of_items.py b/tests/test_3209_awkwardforth_read_negative_number_of_items.py new file mode 100644 index 0000000000..6d9b76571a --- /dev/null +++ b/tests/test_3209_awkwardforth_read_negative_number_of_items.py @@ -0,0 +1,45 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +from __future__ import annotations + +import numpy as np + +import awkward as ak + + +def test_read_negative_number_of_items(): + vm = ak.forth.ForthMachine32("input source -5 source #q-> stack") + vm.run({"source": np.array([1, 2, 3, 4, 5], dtype=np.int64)}) + assert vm.stack == [] + + vm = ak.forth.ForthMachine32("input source output sink float64 -5 source #q-> sink") + vm.run({"source": np.array([1, 2, 3, 4, 5], dtype=np.int64)}) + assert vm.output("sink").tolist() == [] + + +def test_read_negative_and_positive_number_of_items(): + vm = ak.forth.ForthMachine32( + "input source -5 source #q-> stack 5 source #q-> stack" + ) + vm.run({"source": np.array([1, 2, 3, 4, 5], dtype=np.int64)}) + assert vm.stack == [1, 2, 3, 4, 5] + + vm = ak.forth.ForthMachine32( + "input source output sink float64 -5 source #q-> sink 5 source #q-> sink" + ) + vm.run({"source": np.array([1, 2, 3, 4, 5], dtype=np.int64)}) + assert vm.output("sink").tolist() == [1, 2, 3, 4, 5] + + +def test_read_positive_and_negative_number_of_items(): + vm = ak.forth.ForthMachine32( + "input source 5 source #q-> stack -5 source #q-> stack" + ) + vm.run({"source": np.array([1, 2, 3, 4, 5], dtype=np.int64)}) + assert vm.stack == [1, 2, 3, 4, 5] + + vm = ak.forth.ForthMachine32( + "input source output sink float64 5 source #q-> sink -5 source #q-> sink" + ) + vm.run({"source": np.array([1, 2, 3, 4, 5], dtype=np.int64)}) + assert vm.output("sink").tolist() == [1, 2, 3, 4, 5]