From 1e5e3ab69de09f72f877b8eb72ac10a646c9d373 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Sat, 22 Jan 2022 22:21:41 +0100 Subject: [PATCH 01/29] Implement BytesIO.peek() --- Lib/_pyio.py | 8 +++++++ Lib/test/test_memoryio.py | 17 +++++++++++++++ Modules/_io/bytesio.c | 37 +++++++++++++++++++++++++++++++++ Modules/_io/clinic/bytesio.c.h | 38 +++++++++++++++++++++++++++++++++- 4 files changed, 99 insertions(+), 1 deletion(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 32698abac78d25..339d1aa7e13084 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -978,6 +978,14 @@ def tell(self): raise ValueError("tell on closed file") return self._pos + def peek(self, size=-1): + pos = self.tell() + if size == 0: + size = -1 + b = self.read(size) + self.seek(pos) + return b + def truncate(self, pos=None): if self.closed: raise ValueError("truncate on closed file") diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index cd2faba1791c77..f77a99f1bbe14d 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -517,6 +517,23 @@ def test_relative_seek(self): memio.seek(1, 1) self.assertEqual(memio.read(), buf[1:]) + def test_peek(self): + buf = self.buftype("1234567890") + memio = self.ioclass(buf) + + self.assertEqual(memio.peek(1), buf[:1]) + self.assertEqual(memio.peek(1), buf[:1]) + self.assertEqual(memio.peek(), buf) + self.assertEqual(memio.peek(0), buf) + memio.read(1) + self.assertEqual(memio.peek(1), buf[1:2]) + self.assertEqual(memio.peek(), buf[1:]) + self.assertEqual(memio.peek(42), buf[1:]) + memio.read() + self.assertEqual(memio.peek(1), self.EOF) + memio.close() + self.assertRaises(ValueError, memio.peek) + def test_unicode(self): memio = self.ioclass() diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index f3074203f54ea2..51297f55eee8a1 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -462,6 +462,42 @@ _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size) return _io_BytesIO_read_impl(self, size); } + +/*[clinic input] +_io.BytesIO.peek + size: Py_ssize_t(accept={int, NoneType}) = -1 + / + +Return bytes from the stream without advancing the position. + +Return an empty bytes object at EOF. +[clinic start generated code]*/ + +static PyObject * +_io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) +/*[clinic end generated code: output=fa4d8ce28b35db9b input=afc80e71b37e7c59]*/ +{ + Py_ssize_t n; + const char *output; + + CHECK_CLOSED(self); + + /* adjust invalid sizes */ + n = self->string_size - self->pos; + if (size < 1 || size > n) { + size = n; + if (size < 0) + size = 0; + } + + assert(self->buf != NULL); + assert(size <= self->string_size); + output = PyBytes_AS_STRING(self->buf) + self->pos; + return PyBytes_FromStringAndSize(output, size); +} + + + /*[clinic input] _io.BytesIO.readline size: Py_ssize_t(accept={int, NoneType}) = -1 @@ -1019,6 +1055,7 @@ static struct PyMethodDef bytesio_methods[] = { _IO_BYTESIO_READLINE_METHODDEF _IO_BYTESIO_READLINES_METHODDEF _IO_BYTESIO_READ_METHODDEF + _IO_BYTESIO_PEEK_METHODDEF _IO_BYTESIO_GETBUFFER_METHODDEF _IO_BYTESIO_GETVALUE_METHODDEF _IO_BYTESIO_SEEK_METHODDEF diff --git a/Modules/_io/clinic/bytesio.c.h b/Modules/_io/clinic/bytesio.c.h index d42ab48cef2859..ae647d9d809673 100644 --- a/Modules/_io/clinic/bytesio.c.h +++ b/Modules/_io/clinic/bytesio.c.h @@ -232,6 +232,42 @@ _io_BytesIO_read1(bytesio *self, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(_io_BytesIO_peek__doc__, +"peek($self, size=-1, /)\n" +"--\n" +"\n" +"Return bytes from the stream without advancing the position.\n" +"\n" +"Return an empty bytes object at EOF."); + +#define _IO_BYTESIO_PEEK_METHODDEF \ + {"peek", _PyCFunction_CAST(_io_BytesIO_peek), METH_FASTCALL, _io_BytesIO_peek__doc__}, + +static PyObject * +_io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size); + +static PyObject * +_io_BytesIO_peek(bytesio *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_ssize_t size = -1; + + if (!_PyArg_CheckPositional("peek", nargs, 0, 1)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + if (!_Py_convert_optional_to_ssize_t(args[0], &size)) { + goto exit; + } +skip_optional: + return_value = _io_BytesIO_peek_impl(self, size); + +exit: + return return_value; +} + PyDoc_STRVAR(_io_BytesIO_readline__doc__, "readline($self, size=-1, /)\n" "--\n" @@ -537,4 +573,4 @@ _io_BytesIO___init__(PyObject *self, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=b753fdf1ba36c461 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=106d7918b585497c input=a9049054013a1b77]*/ From acdfe2eaba72680abc0dccea2f158860038ff218 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sun, 10 Apr 2022 20:11:00 +0000 Subject: [PATCH 02/29] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst diff --git a/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst b/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst new file mode 100644 index 00000000000000..530381efef8746 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst @@ -0,0 +1 @@ +Add a ``peek()`` method to :class:`io.BytesIO`. From b3f3c3d108b31e801c20443d64f3a88d5dabdf41 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Wed, 9 Nov 2022 22:13:41 +0100 Subject: [PATCH 03/29] Document BytesIO.peek() --- Doc/library/io.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 01088879218cb4..6327d32a4d82c7 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -728,6 +728,12 @@ than raw I/O does. Return :class:`bytes` containing the entire contents of the buffer. + .. method:: peek(size=0, /) + + .. versionadded:: 3.12 + + Return bytes from the current position onwards but without advancing the + position. The number of bytes returned may be less or more than requested. .. method:: read1(size=-1, /) From 25069677314c276f26c0c0eaa18c30840c3e81cd Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Wed, 9 Nov 2022 22:18:46 +0100 Subject: [PATCH 04/29] Implement with the help of read_bytes() This allows peek() to use the same optimization that read_bytes() has of returning a reference to the buffer when possible (without copying). --- Modules/_io/bytesio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 51297f55eee8a1..e6c6634037d752 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -489,11 +489,11 @@ _io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) if (size < 0) size = 0; } + Py_ssize_t prev_pos = self->pos; + PyObject* result = read_bytes(self, size); + self->pos = prev_pos; - assert(self->buf != NULL); - assert(size <= self->string_size); - output = PyBytes_AS_STRING(self->buf) + self->pos; - return PyBytes_FromStringAndSize(output, size); + return result; } From a5ac6012019065f19c474c7d9bf258480e55734f Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Sat, 8 Jul 2023 14:29:35 +0200 Subject: [PATCH 05/29] =?UTF-8?q?Add=20to=20What=E2=80=99s=20New?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Doc/whatsnew/3.13.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index fa24dc072ddefd..23d7007d03ecda 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -151,6 +151,8 @@ and only logged in :ref:`Python Development Mode ` or on :ref:`Python built on debug mode `. (Contributed by Victor Stinner in :gh:`62948`.) +* Add :meth:`~io.BytesIO.peek`. (Contributed by Marcel Martin in :gh:`90533`.) + opcode ------ From 9da7f9ff712faded9a6c3a67f0b1de4302397d37 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Sat, 8 Jul 2023 14:33:54 +0200 Subject: [PATCH 06/29] versionadded: 3.12 -> 3.13 --- Doc/library/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 6327d32a4d82c7..d89c892e0256c9 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -730,7 +730,7 @@ than raw I/O does. .. method:: peek(size=0, /) - .. versionadded:: 3.12 + .. versionadded:: 3.13 Return bytes from the current position onwards but without advancing the position. The number of bytes returned may be less or more than requested. From 79d503224309596aa2dd54c4bfb2f12f9e390b64 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Sat, 8 Jul 2023 14:35:22 +0200 Subject: [PATCH 07/29] Remove unused variable --- Modules/_io/bytesio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index e6c6634037d752..8ecf5e5b0719b4 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -478,7 +478,6 @@ _io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) /*[clinic end generated code: output=fa4d8ce28b35db9b input=afc80e71b37e7c59]*/ { Py_ssize_t n; - const char *output; CHECK_CLOSED(self); From 2963dab13fc4fa8ee70ecea16a6d3b8f8906a212 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 22 Sep 2023 09:49:24 +0200 Subject: [PATCH 08/29] Test tell() after peek() --- Lib/test/test_memoryio.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index f77a99f1bbe14d..d1d351000fe63b 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -520,11 +520,12 @@ def test_relative_seek(self): def test_peek(self): buf = self.buftype("1234567890") memio = self.ioclass(buf) - + pos = memio.tell() self.assertEqual(memio.peek(1), buf[:1]) self.assertEqual(memio.peek(1), buf[:1]) self.assertEqual(memio.peek(), buf) self.assertEqual(memio.peek(0), buf) + self.assertEqual(memio.tell(), pos) memio.read(1) self.assertEqual(memio.peek(1), buf[1:2]) self.assertEqual(memio.peek(), buf[1:]) From 7d8793a8e7c7c8d91139d350280a9a93d958153b Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 22 Sep 2023 09:49:55 +0200 Subject: [PATCH 09/29] Update docs, factor out peek_bytes, semantics Semantic change: The default argument for peek is now size=1. --- Doc/library/io.rst | 11 +++++++---- Lib/_pyio.py | 13 ++++++------- Lib/test/test_memoryio.py | 5 +++-- Modules/_io/bytesio.c | 23 +++++++++++++---------- Modules/_io/clinic/bytesio.c.h | 20 +++++++++++++++----- 5 files changed, 44 insertions(+), 28 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index d89c892e0256c9..9c6ad31a7b7092 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -728,12 +728,15 @@ than raw I/O does. Return :class:`bytes` containing the entire contents of the buffer. - .. method:: peek(size=0, /) - - .. versionadded:: 3.13 + .. method:: peek(size=1, /) Return bytes from the current position onwards but without advancing the - position. The number of bytes returned may be less or more than requested. + position. If the size argument is less than one or larger than the number + of available bytes, a copy of the buffer from the current position until + the end is returned. + Return an empty bytes object at EOF. + + .. versionadded:: 3.13 .. method:: read1(size=-1, /) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 339d1aa7e13084..e76c404f470f79 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -978,13 +978,12 @@ def tell(self): raise ValueError("tell on closed file") return self._pos - def peek(self, size=-1): - pos = self.tell() - if size == 0: - size = -1 - b = self.read(size) - self.seek(pos) - return b + def peek(self, size=1): + if self.closed: + raise ValueError("peek on closed file") + if size < 1: + size = len(self._buffer) - self._pos + return self._buffer[self._pos : self._pos + size] def truncate(self, pos=None): if self.closed: diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index d1d351000fe63b..bb5164ae0067da 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -523,12 +523,13 @@ def test_peek(self): pos = memio.tell() self.assertEqual(memio.peek(1), buf[:1]) self.assertEqual(memio.peek(1), buf[:1]) - self.assertEqual(memio.peek(), buf) + self.assertEqual(memio.peek(), buf[:1]) self.assertEqual(memio.peek(0), buf) self.assertEqual(memio.tell(), pos) memio.read(1) self.assertEqual(memio.peek(1), buf[1:2]) - self.assertEqual(memio.peek(), buf[1:]) + self.assertEqual(memio.peek(), buf[1:2]) + self.assertEqual(memio.peek(0), buf[1:]) self.assertEqual(memio.peek(42), buf[1:]) memio.read() self.assertEqual(memio.peek(1), self.EOF) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 8ecf5e5b0719b4..1b6c7364a0d8ac 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -394,8 +394,9 @@ _io_BytesIO_tell_impl(bytesio *self) return PyLong_FromSsize_t(self->pos); } +// Read without advancing position static PyObject * -read_bytes(bytesio *self, Py_ssize_t size) +peek_bytes(bytesio *self, Py_ssize_t size) { const char *output; @@ -404,15 +405,20 @@ read_bytes(bytesio *self, Py_ssize_t size) if (size > 1 && self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) && self->exports == 0) { - self->pos += size; return Py_NewRef(self->buf); } output = PyBytes_AS_STRING(self->buf) + self->pos; - self->pos += size; return PyBytes_FromStringAndSize(output, size); } +static PyObject * +read_bytes(bytesio *self, Py_ssize_t size) { + PyObject *bytes = peek_bytes(self, size); + self->pos += size; + return bytes; +} + /*[clinic input] _io.BytesIO.read size: Py_ssize_t(accept={int, NoneType}) = -1 @@ -465,17 +471,18 @@ _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size) /*[clinic input] _io.BytesIO.peek - size: Py_ssize_t(accept={int, NoneType}) = -1 + size: Py_ssize_t = 1 / Return bytes from the stream without advancing the position. +If the size argument is zero or negative, read until EOF is reached. Return an empty bytes object at EOF. [clinic start generated code]*/ static PyObject * _io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) -/*[clinic end generated code: output=fa4d8ce28b35db9b input=afc80e71b37e7c59]*/ +/*[clinic end generated code: output=fa4d8ce28b35db9b input=cb06614a3ed0496e]*/ { Py_ssize_t n; @@ -488,11 +495,7 @@ _io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) if (size < 0) size = 0; } - Py_ssize_t prev_pos = self->pos; - PyObject* result = read_bytes(self, size); - self->pos = prev_pos; - - return result; + return peek_bytes(self, size); } diff --git a/Modules/_io/clinic/bytesio.c.h b/Modules/_io/clinic/bytesio.c.h index ae647d9d809673..d384c74d84aa15 100644 --- a/Modules/_io/clinic/bytesio.c.h +++ b/Modules/_io/clinic/bytesio.c.h @@ -233,11 +233,12 @@ _io_BytesIO_read1(bytesio *self, PyObject *const *args, Py_ssize_t nargs) } PyDoc_STRVAR(_io_BytesIO_peek__doc__, -"peek($self, size=-1, /)\n" +"peek($self, size=1, /)\n" "--\n" "\n" "Return bytes from the stream without advancing the position.\n" "\n" +"If the size argument is zero or negative, read until EOF is reached.\n" "Return an empty bytes object at EOF."); #define _IO_BYTESIO_PEEK_METHODDEF \ @@ -250,7 +251,7 @@ static PyObject * _io_BytesIO_peek(bytesio *self, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - Py_ssize_t size = -1; + Py_ssize_t size = 1; if (!_PyArg_CheckPositional("peek", nargs, 0, 1)) { goto exit; @@ -258,8 +259,17 @@ _io_BytesIO_peek(bytesio *self, PyObject *const *args, Py_ssize_t nargs) if (nargs < 1) { goto skip_optional; } - if (!_Py_convert_optional_to_ssize_t(args[0], &size)) { - goto exit; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + size = ival; } skip_optional: return_value = _io_BytesIO_peek_impl(self, size); @@ -573,4 +583,4 @@ _io_BytesIO___init__(PyObject *self, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=106d7918b585497c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9fe27d8d0d4d695b input=a9049054013a1b77]*/ From d4d5a55ec7e6d1daa7d20c28c8308b086a1e5f26 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 00:49:27 +0200 Subject: [PATCH 10/29] Update Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst Co-authored-by: Erlend E. Aasland --- .../next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst b/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst index 530381efef8746..c11f101b749c17 100644 --- a/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst +++ b/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst @@ -1 +1 @@ -Add a ``peek()`` method to :class:`io.BytesIO`. +Add :meth:`io.BytesIO.peek`. From 9cdd2319fed63fcc6749d5a327f2b2396b39c7da Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 00:50:46 +0200 Subject: [PATCH 11/29] Update Modules/_io/bytesio.c Co-authored-by: Erlend E. Aasland --- Modules/_io/bytesio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 1b6c7364a0d8ac..92f79b1290f78d 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -413,7 +413,8 @@ peek_bytes(bytesio *self, Py_ssize_t size) } static PyObject * -read_bytes(bytesio *self, Py_ssize_t size) { +read_bytes(bytesio *self, Py_ssize_t size) +{ PyObject *bytes = peek_bytes(self, size); self->pos += size; return bytes; From d9948c86cd74a2bf564afd958088bbd4251ee107 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 00:51:19 +0200 Subject: [PATCH 12/29] Update Modules/_io/bytesio.c Co-authored-by: Erlend E. Aasland --- Modules/_io/bytesio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 92f79b1290f78d..3a70987ead9817 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -493,8 +493,9 @@ _io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) n = self->string_size - self->pos; if (size < 1 || size > n) { size = n; - if (size < 0) + if (size < 0) { size = 0; + } } return peek_bytes(self, size); } From 69ddb4f5e9351c41af5033de6b0b42be375ff9b2 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 00:25:06 +0200 Subject: [PATCH 13/29] Use SemBr Co-authored-by: Erlend E. Aasland --- Doc/library/io.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 9c6ad31a7b7092..07627712828cbe 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -730,10 +730,9 @@ than raw I/O does. .. method:: peek(size=1, /) - Return bytes from the current position onwards but without advancing the - position. If the size argument is less than one or larger than the number - of available bytes, a copy of the buffer from the current position until - the end is returned. + Return bytes from the current position onwards without advancing the position. + If the size argument is less than one or larger than the number of available bytes, + a copy of the buffer from the current position until the end is returned. Return an empty bytes object at EOF. .. versionadded:: 3.13 From 58a5d580081e0b9ef82daf69140a3c20a2458c44 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 00:54:10 +0200 Subject: [PATCH 14/29] Update Doc/whatsnew/3.13.rst Co-authored-by: Erlend E. Aasland --- Doc/whatsnew/3.13.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 8cfd74df46f32a..e741bc3656c2f5 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -151,7 +151,7 @@ and only logged in :ref:`Python Development Mode ` or on :ref:`Python built on debug mode `. (Contributed by Victor Stinner in :gh:`62948`.) -* Add :meth:`~io.BytesIO.peek`. (Contributed by Marcel Martin in :gh:`90533`.) +* Add :meth:`io.BytesIO.peek`. (Contributed by Marcel Martin in :gh:`90533`.) opcode ------ From 3d21011f573ff2f5a07ebf88549b66ad43fd1143 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 00:55:39 +0200 Subject: [PATCH 15/29] Apply suggestions from code review Co-authored-by: Victor Stinner --- Lib/test/test_memoryio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index bb5164ae0067da..a9bf8c08f827f2 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -520,12 +520,12 @@ def test_relative_seek(self): def test_peek(self): buf = self.buftype("1234567890") memio = self.ioclass(buf) - pos = memio.tell() + self.assertEqual(memio.tell(), 0) self.assertEqual(memio.peek(1), buf[:1]) self.assertEqual(memio.peek(1), buf[:1]) self.assertEqual(memio.peek(), buf[:1]) self.assertEqual(memio.peek(0), buf) - self.assertEqual(memio.tell(), pos) + self.assertEqual(memio.tell(), 0) memio.read(1) self.assertEqual(memio.peek(1), buf[1:2]) self.assertEqual(memio.peek(), buf[1:2]) From 4f4999fb3059225dd1728c4eb99d20c13c38c4c4 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 00:58:42 +0200 Subject: [PATCH 16/29] Use a context manager around memio in test_peek --- Lib/test/test_memoryio.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index a9bf8c08f827f2..1b852d31091299 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -519,21 +519,20 @@ def test_relative_seek(self): def test_peek(self): buf = self.buftype("1234567890") - memio = self.ioclass(buf) - self.assertEqual(memio.tell(), 0) - self.assertEqual(memio.peek(1), buf[:1]) - self.assertEqual(memio.peek(1), buf[:1]) - self.assertEqual(memio.peek(), buf[:1]) - self.assertEqual(memio.peek(0), buf) - self.assertEqual(memio.tell(), 0) - memio.read(1) - self.assertEqual(memio.peek(1), buf[1:2]) - self.assertEqual(memio.peek(), buf[1:2]) - self.assertEqual(memio.peek(0), buf[1:]) - self.assertEqual(memio.peek(42), buf[1:]) - memio.read() - self.assertEqual(memio.peek(1), self.EOF) - memio.close() + with self.ioclass(buf) as memio: + self.assertEqual(memio.tell(), 0) + self.assertEqual(memio.peek(1), buf[:1]) + self.assertEqual(memio.peek(1), buf[:1]) + self.assertEqual(memio.peek(), buf[:1]) + self.assertEqual(memio.peek(0), buf) + self.assertEqual(memio.tell(), 0) + memio.read(1) + self.assertEqual(memio.peek(1), buf[1:2]) + self.assertEqual(memio.peek(), buf[1:2]) + self.assertEqual(memio.peek(0), buf[1:]) + self.assertEqual(memio.peek(42), buf[1:]) + memio.read() + self.assertEqual(memio.peek(1), self.EOF) self.assertRaises(ValueError, memio.peek) def test_unicode(self): From 56fbee311c5febe44005ce1ee1871e11f3d173ea Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 01:01:41 +0200 Subject: [PATCH 17/29] Add more tests for tell() after peek() --- Lib/test/test_memoryio.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 1b852d31091299..df8e11b9e702c2 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -527,12 +527,16 @@ def test_peek(self): self.assertEqual(memio.peek(0), buf) self.assertEqual(memio.tell(), 0) memio.read(1) + self.assertEqual(memio.tell(), 1) self.assertEqual(memio.peek(1), buf[1:2]) self.assertEqual(memio.peek(), buf[1:2]) self.assertEqual(memio.peek(0), buf[1:]) self.assertEqual(memio.peek(42), buf[1:]) + self.assertEqual(memio.tell(), 1) memio.read() + self.assertEqual(memio.tell(), len(buf)) self.assertEqual(memio.peek(1), self.EOF) + self.assertEqual(memio.tell(), len(buf)) self.assertRaises(ValueError, memio.peek) def test_unicode(self): From 4c3c9083aad4bfd97b49bf4b013879b92daeecf0 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 01:05:17 +0200 Subject: [PATCH 18/29] Document why size < 0 can happen --- Modules/_io/bytesio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 3a70987ead9817..94bdd69ef3a1aa 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -493,6 +493,7 @@ _io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) n = self->string_size - self->pos; if (size < 1 || size > n) { size = n; + /* size can be negative after truncate() */ if (size < 0) { size = 0; } From 1f2b5c54fdad8047773ebc65dc0b896195ead3e3 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 09:18:17 +0200 Subject: [PATCH 19/29] Update Modules/_io/bytesio.c Co-authored-by: Erlend E. Aasland --- Modules/_io/bytesio.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 94bdd69ef3a1aa..071c8694bcd843 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -485,12 +485,10 @@ static PyObject * _io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) /*[clinic end generated code: output=fa4d8ce28b35db9b input=cb06614a3ed0496e]*/ { - Py_ssize_t n; - CHECK_CLOSED(self); /* adjust invalid sizes */ - n = self->string_size - self->pos; + Py_ssize_t n = self->string_size - self->pos; if (size < 1 || size > n) { size = n; /* size can be negative after truncate() */ From a1504a7474f38b8d1e9ec91002361928827c6a42 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 09:17:09 +0200 Subject: [PATCH 20/29] Do not update pos if peek_bytes failed --- Modules/_io/bytesio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 071c8694bcd843..e90fedd2d8ae0b 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -416,7 +416,9 @@ static PyObject * read_bytes(bytesio *self, Py_ssize_t size) { PyObject *bytes = peek_bytes(self, size); - self->pos += size; + if (bytes != NULL) { + self->pos += size; + } return bytes; } From 827a785a926f57755d65af458381c76469cd8b74 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 09:17:42 +0200 Subject: [PATCH 21/29] Size can be negative after truncate or seek --- Modules/_io/bytesio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index e90fedd2d8ae0b..2ba9676056f46e 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -493,7 +493,7 @@ _io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) Py_ssize_t n = self->string_size - self->pos; if (size < 1 || size > n) { size = n; - /* size can be negative after truncate() */ + /* size can be negative after truncate() or seek() */ if (size < 0) { size = 0; } From eebd2894f07320a71bb061be3680878ccb6d29c3 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 10:39:34 +0200 Subject: [PATCH 22/29] Test with size<0 and size>len(buf) --- Lib/test/test_memoryio.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index df8e11b9e702c2..1eaf85a7e18ff1 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -525,13 +525,17 @@ def test_peek(self): self.assertEqual(memio.peek(1), buf[:1]) self.assertEqual(memio.peek(), buf[:1]) self.assertEqual(memio.peek(0), buf) + self.assertEqual(memio.peek(len(buf) + 100), buf) + self.assertEqual(memio.peek(-1), buf) self.assertEqual(memio.tell(), 0) memio.read(1) self.assertEqual(memio.tell(), 1) self.assertEqual(memio.peek(1), buf[1:2]) + self.assertEqual(memio.peek(1), buf[1:2]) self.assertEqual(memio.peek(), buf[1:2]) self.assertEqual(memio.peek(0), buf[1:]) - self.assertEqual(memio.peek(42), buf[1:]) + self.assertEqual(memio.peek(len(buf) + 100), buf[1:]) + self.assertEqual(memio.peek(-1), buf[1:]) self.assertEqual(memio.tell(), 1) memio.read() self.assertEqual(memio.tell(), len(buf)) From 48551896ac03b105b95327accc4b75128b2afa80 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 11:48:57 +0200 Subject: [PATCH 23/29] Test peek() after write() --- Lib/test/test_memoryio.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 1eaf85a7e18ff1..3ddf1b769f41a6 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -541,6 +541,15 @@ def test_peek(self): self.assertEqual(memio.tell(), len(buf)) self.assertEqual(memio.peek(1), self.EOF) self.assertEqual(memio.tell(), len(buf)) + # Peeking works after writing + abc = self.buftype("abc") + memio.write(abc) + self.assertEqual(memio.peek(), self.EOF) + memio.seek(len(buf)) + self.assertEqual(memio.peek(), abc[:1]) + self.assertEqual(memio.peek(-1), abc) + self.assertEqual(memio.peek(len(abc) + 100), abc) + self.assertEqual(memio.tell(), len(buf)) self.assertRaises(ValueError, memio.peek) def test_unicode(self): From fd85b46830f0015a39d41965ecbe42cb83f7fc54 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 15:53:11 +0200 Subject: [PATCH 24/29] Document BufferedReader.peek and BytesIO.peek similarly --- Doc/library/io.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 07627712828cbe..991b41a4335453 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -731,9 +731,10 @@ than raw I/O does. .. method:: peek(size=1, /) Return bytes from the current position onwards without advancing the position. + At least one byte of data is returned if not at EOF. + Return an empty :class:`bytes` object at EOF. If the size argument is less than one or larger than the number of available bytes, a copy of the buffer from the current position until the end is returned. - Return an empty bytes object at EOF. .. versionadded:: 3.13 @@ -769,9 +770,12 @@ than raw I/O does. .. method:: peek(size=0, /) - Return bytes from the stream without advancing the position. At most one - single read on the raw stream is done to satisfy the call. The number of - bytes returned may be less or more than requested. + Return bytes from the current position onwards without advancing the position. + At least one byte of data is returned if not at EOF. + Return an empty :class:`bytes` object at EOF. + At most one single read on the underlying raw stream is done to satisfy the call. + The exact number of bytes returned is unspecified + (*size* is ignored). .. method:: read(size=-1, /) From 5733d5a5b285926945510f4398a612ef5ce3b95d Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 16:02:09 +0200 Subject: [PATCH 25/29] Comment --- Lib/_pyio.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index e76c404f470f79..0fa89983a99bee 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -981,6 +981,9 @@ def tell(self): def peek(self, size=1): if self.closed: raise ValueError("peek on closed file") + # Due to slicing semantics, this works correctly + # even if the size is greater than the buffer length or + # the position is beyond the end of the buffer if size < 1: size = len(self._buffer) - self._pos return self._buffer[self._pos : self._pos + size] From 4d832e0576d79f495e641c144868eae18d17c5d7 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Fri, 29 Sep 2023 16:45:54 +0200 Subject: [PATCH 26/29] More it more explicit that size is ignored --- Doc/library/io.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 991b41a4335453..c7c067b52ff24f 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -774,8 +774,8 @@ than raw I/O does. At least one byte of data is returned if not at EOF. Return an empty :class:`bytes` object at EOF. At most one single read on the underlying raw stream is done to satisfy the call. - The exact number of bytes returned is unspecified - (*size* is ignored). + The *size* argument is ignored. + The number of read bytes depends on the buffer size and the current position in the internal buffer. .. method:: read(size=-1, /) From 1d793e3df9b072053ed3df79505bdc62b16448a0 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Mon, 23 Oct 2023 08:47:15 +0200 Subject: [PATCH 27/29] Return an empty bytes object for size=0 --- Doc/library/io.rst | 2 +- Lib/_pyio.py | 2 +- Lib/test/test_memoryio.py | 5 +++-- Modules/_io/bytesio.c | 6 +++--- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 4f7c980585215b..d1e7dfdc471b6c 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -733,7 +733,7 @@ than raw I/O does. Return bytes from the current position onwards without advancing the position. At least one byte of data is returned if not at EOF. Return an empty :class:`bytes` object at EOF. - If the size argument is less than one or larger than the number of available bytes, + If the size argument is negative or larger than the number of available bytes, a copy of the buffer from the current position until the end is returned. .. versionadded:: 3.13 diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 0fa89983a99bee..f4d3d11b791ffe 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -984,7 +984,7 @@ def peek(self, size=1): # Due to slicing semantics, this works correctly # even if the size is greater than the buffer length or # the position is beyond the end of the buffer - if size < 1: + if size < 0: size = len(self._buffer) - self._pos return self._buffer[self._pos : self._pos + size] diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 3ddf1b769f41a6..55d08d952cc9b8 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -524,7 +524,7 @@ def test_peek(self): self.assertEqual(memio.peek(1), buf[:1]) self.assertEqual(memio.peek(1), buf[:1]) self.assertEqual(memio.peek(), buf[:1]) - self.assertEqual(memio.peek(0), buf) + self.assertEqual(memio.peek(0), b"") self.assertEqual(memio.peek(len(buf) + 100), buf) self.assertEqual(memio.peek(-1), buf) self.assertEqual(memio.tell(), 0) @@ -533,13 +533,14 @@ def test_peek(self): self.assertEqual(memio.peek(1), buf[1:2]) self.assertEqual(memio.peek(1), buf[1:2]) self.assertEqual(memio.peek(), buf[1:2]) - self.assertEqual(memio.peek(0), buf[1:]) + self.assertEqual(memio.peek(0), b"") self.assertEqual(memio.peek(len(buf) + 100), buf[1:]) self.assertEqual(memio.peek(-1), buf[1:]) self.assertEqual(memio.tell(), 1) memio.read() self.assertEqual(memio.tell(), len(buf)) self.assertEqual(memio.peek(1), self.EOF) + self.assertEqual(memio.peek(0), b"") self.assertEqual(memio.tell(), len(buf)) # Peeking works after writing abc = self.buftype("abc") diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 2ba9676056f46e..5c7d436416b387 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -479,7 +479,7 @@ _io.BytesIO.peek Return bytes from the stream without advancing the position. -If the size argument is zero or negative, read until EOF is reached. +If the size argument is negative, read until EOF is reached. Return an empty bytes object at EOF. [clinic start generated code]*/ @@ -491,9 +491,9 @@ _io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) /* adjust invalid sizes */ Py_ssize_t n = self->string_size - self->pos; - if (size < 1 || size > n) { + if (size < 0 || size > n) { size = n; - /* size can be negative after truncate() or seek() */ + /* n can be negative after truncate() or seek() */ if (size < 0) { size = 0; } From 9b0b04fce52316fff2ab91f57280f703d8395921 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Mon, 23 Oct 2023 09:10:13 +0200 Subject: [PATCH 28/29] Simplify --- Lib/_pyio.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index f4d3d11b791ffe..801f0447447a21 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -981,12 +981,9 @@ def tell(self): def peek(self, size=1): if self.closed: raise ValueError("peek on closed file") - # Due to slicing semantics, this works correctly - # even if the size is greater than the buffer length or - # the position is beyond the end of the buffer if size < 0: - size = len(self._buffer) - self._pos - return self._buffer[self._pos : self._pos + size] + return self._buffer[self._pos:] + return self._buffer[self._pos:self._pos + size] def truncate(self, pos=None): if self.closed: From bb6447d860e81234594ea5e0615a175eaa898901 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Mon, 23 Oct 2023 09:11:33 +0200 Subject: [PATCH 29/29] Test peek(3) and peek(5) --- Lib/test/test_memoryio.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 55d08d952cc9b8..9564f85cfa5689 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -523,6 +523,8 @@ def test_peek(self): self.assertEqual(memio.tell(), 0) self.assertEqual(memio.peek(1), buf[:1]) self.assertEqual(memio.peek(1), buf[:1]) + self.assertEqual(memio.peek(3), buf[:3]) + self.assertEqual(memio.peek(5), buf[:5]) self.assertEqual(memio.peek(), buf[:1]) self.assertEqual(memio.peek(0), b"") self.assertEqual(memio.peek(len(buf) + 100), buf) @@ -532,6 +534,8 @@ def test_peek(self): self.assertEqual(memio.tell(), 1) self.assertEqual(memio.peek(1), buf[1:2]) self.assertEqual(memio.peek(1), buf[1:2]) + self.assertEqual(memio.peek(3), buf[1:4]) + self.assertEqual(memio.peek(5), buf[1:6]) self.assertEqual(memio.peek(), buf[1:2]) self.assertEqual(memio.peek(0), b"") self.assertEqual(memio.peek(len(buf) + 100), buf[1:]) @@ -540,6 +544,8 @@ def test_peek(self): memio.read() self.assertEqual(memio.tell(), len(buf)) self.assertEqual(memio.peek(1), self.EOF) + self.assertEqual(memio.peek(3), self.EOF) + self.assertEqual(memio.peek(5), self.EOF) self.assertEqual(memio.peek(0), b"") self.assertEqual(memio.tell(), len(buf)) # Peeking works after writing