Skip to content

Commit

Permalink
Add PyUnicodeWriter API (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
vstinner authored Jul 18, 2024
1 parent ea1f7f6 commit 4094c64
Show file tree
Hide file tree
Showing 3 changed files with 312 additions and 0 deletions.
13 changes: 13 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
Changelog
=========

* 2024-07-18: Add functions:

* ``PyUnicodeWriter_Create()``
* ``PyUnicodeWriter_Discard()``
* ``PyUnicodeWriter_Finish()``
* ``PyUnicodeWriter_WriteChar()``
* ``PyUnicodeWriter_WriteUTF8()``
* ``PyUnicodeWriter_WriteStr()``
* ``PyUnicodeWriter_WriteRepr()``
* ``PyUnicodeWriter_WriteSubstring()``
* ``PyUnicodeWriter_WriteWideChar()``
* ``PyUnicodeWriter_Format()``

* 2024-06-03: Add ``PyLong_GetSign()``.
* 2024-04-23: Drop Python 3.5 support. It cannot be tested anymore (pip fails).
* 2024-04-02: Add ``PyDict_SetDefaultRef()`` function.
Expand Down
153 changes: 153 additions & 0 deletions pythoncapi_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -1338,6 +1338,159 @@ PyDict_SetDefaultRef(PyObject *d, PyObject *key, PyObject *default_value,
}
#endif

#if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION)
typedef struct PyUnicodeWriter PyUnicodeWriter;

static inline void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
{
_PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
PyMem_Free(writer);
}

static inline PyUnicodeWriter* PyUnicodeWriter_Create(Py_ssize_t length)
{
if (length < 0) {
PyErr_SetString(PyExc_ValueError,
"length must be positive");
return NULL;
}

const size_t size = sizeof(_PyUnicodeWriter);
PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
if (pub_writer == _Py_NULL) {
PyErr_NoMemory();
return _Py_NULL;
}
_PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;

_PyUnicodeWriter_Init(writer);
if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
PyUnicodeWriter_Discard(pub_writer);
return NULL;
}
writer->overallocate = 1;
return pub_writer;
}

static inline PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
{
PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
PyMem_Free(writer);
return str;
}

static inline int
PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
{
if (ch > 0x10ffff) {
PyErr_SetString(PyExc_ValueError,
"character must be in range(0x110000)");
return -1;
}

return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
}

int
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
{
PyObject *str = PyObject_Str(obj);
if (str == NULL) {
return -1;
}

int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
Py_DECREF(str);
return res;
}

int
PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
{
PyObject *str = PyObject_Repr(obj);
if (str == NULL) {
return -1;
}

int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
Py_DECREF(str);
return res;
}

static inline int
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
const char *str, Py_ssize_t size)
{
if (size < 0) {
size = (Py_ssize_t)strlen(str);
}

PyObject *str_obj = PyUnicode_FromStringAndSize(str, size);
if (str_obj == _Py_NULL) {
return -1;
}

int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj);
Py_DECREF(str_obj);
return res;
}

static inline int
PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer,
const wchar_t *str, Py_ssize_t size)
{
if (size < 0) {
size = (Py_ssize_t)wcslen(str);
}

PyObject *str_obj = PyUnicode_FromWideChar(str, size);
if (str_obj == _Py_NULL) {
return -1;
}

int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj);
Py_DECREF(str_obj);
return res;
}

static inline int
PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
Py_ssize_t start, Py_ssize_t end)
{
if (!PyUnicode_Check(str)) {
PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
return -1;
}
if (start < 0 || start > end) {
PyErr_Format(PyExc_ValueError, "invalid start argument");
return -1;
}
if (end > PyUnicode_GET_LENGTH(str)) {
PyErr_Format(PyExc_ValueError, "invalid end argument");
return -1;
}

return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
start, end);
}

static inline int
PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)
{
va_list vargs;
va_start(vargs, format);
PyObject *str = PyUnicode_FromFormatV(format, vargs);
va_end(vargs);
if (str == _Py_NULL) {
return -1;
}

int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
Py_DECREF(str);
return res;
}
#endif // PY_VERSION_HEX < 0x030E0000

// gh-116560 added PyLong_GetSign() to Python 3.14.0a0
#if PY_VERSION_HEX < 0x030E00A0
Expand Down
146 changes: 146 additions & 0 deletions tests/test_pythoncapi_compat_cext.c
Original file line number Diff line number Diff line change
Expand Up @@ -1733,6 +1733,147 @@ test_get_constant(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
}


#if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION)
#define TEST_UNICODEWRITER 1

static PyObject *
test_unicodewriter(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
{
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
return NULL;
}
int ret;

// test PyUnicodeWriter_WriteStr()
PyObject *str = PyUnicode_FromString("var");
if (str == NULL) {
goto error;
}
ret = PyUnicodeWriter_WriteStr(writer, str);
Py_CLEAR(str);
if (ret < 0) {
goto error;
}

// test PyUnicodeWriter_WriteChar()
if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
goto error;
}

// test PyUnicodeWriter_WriteSubstring()
str = PyUnicode_FromString("[long]");
if (str == NULL) {
goto error;
}
ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
Py_CLEAR(str);
if (ret < 0) {
goto error;
}

// test PyUnicodeWriter_WriteUTF8()
if (PyUnicodeWriter_WriteUTF8(writer, " valu\xC3\xA9", -1) < 0) {
goto error;
}
if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) {
goto error;
}

// test PyUnicodeWriter_WriteRepr()
str = PyUnicode_FromString("repr");
if (str == NULL) {
goto error;
}
if (PyUnicodeWriter_WriteRepr(writer, str) < 0) {
goto error;
}
Py_CLEAR(str);

{
PyObject *result = PyUnicodeWriter_Finish(writer);
if (result == NULL) {
return NULL;
}
assert(PyUnicode_EqualToUTF8(result, "var=long valu\xC3\xA9 'repr'"));
Py_DECREF(result);
}

Py_RETURN_NONE;

error:
PyUnicodeWriter_Discard(writer);
return NULL;
}


static PyObject *
test_unicodewriter_widechar(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
{
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
return NULL;
}

// test PyUnicodeWriter_WriteWideChar()
int ret = PyUnicodeWriter_WriteWideChar(writer, L"euro=\u20AC", -1);
if (ret < 0) {
goto error;
}

{
PyObject *result = PyUnicodeWriter_Finish(writer);
if (result == NULL) {
return NULL;
}
assert(PyUnicode_EqualToUTF8(result, "euro=\xe2\x82\xac"));
Py_DECREF(result);
}

Py_RETURN_NONE;

error:
PyUnicodeWriter_Discard(writer);
return NULL;
}


static PyObject *
test_unicodewriter_format(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
{
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
return NULL;
}

// test PyUnicodeWriter_Format()
if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
goto error;
}

// test PyUnicodeWriter_WriteChar()
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
goto error;
}

{
PyObject *result = PyUnicodeWriter_Finish(writer);
if (result == NULL) {
return NULL;
}
assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
Py_DECREF(result);
}

Py_RETURN_NONE;

error:
PyUnicodeWriter_Discard(writer);
return NULL;
}
#endif


static struct PyMethodDef methods[] = {
{"test_object", test_object, METH_NOARGS, _Py_NULL},
{"test_py_is", test_py_is, METH_NOARGS, _Py_NULL},
Expand Down Expand Up @@ -1771,6 +1912,11 @@ static struct PyMethodDef methods[] = {
{"test_time", test_time, METH_NOARGS, _Py_NULL},
#endif
{"test_get_constant", test_get_constant, METH_NOARGS, _Py_NULL},
#ifdef TEST_UNICODEWRITER
{"test_unicodewriter", test_unicodewriter, METH_NOARGS, _Py_NULL},
{"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS, _Py_NULL},
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS, _Py_NULL},
#endif
{_Py_NULL, _Py_NULL, 0, _Py_NULL}
};

Expand Down

0 comments on commit 4094c64

Please sign in to comment.