2023-05-22 00:32:39 +03:00
|
|
|
#include <stddef.h> // ptrdiff_t
|
|
|
|
|
2022-08-10 09:10:25 +02:00
|
|
|
#include "parts.h"
|
2023-09-01 21:42:42 +03:00
|
|
|
#include "util.h"
|
2022-08-10 09:10:25 +02:00
|
|
|
|
2024-03-19 13:30:39 +01:00
|
|
|
/* Test PyUnicode_New() */
|
2022-08-10 09:10:25 +02:00
|
|
|
static PyObject *
|
2024-03-19 13:30:39 +01:00
|
|
|
unicode_new(PyObject *self, PyObject *args)
|
2022-08-10 09:10:25 +02:00
|
|
|
{
|
2024-03-19 13:30:39 +01:00
|
|
|
Py_ssize_t size;
|
|
|
|
unsigned int maxchar;
|
|
|
|
PyObject *result;
|
2022-08-10 09:10:25 +02:00
|
|
|
|
2024-03-19 13:30:39 +01:00
|
|
|
if (!PyArg_ParseTuple(args, "nI", &size, &maxchar)) {
|
2022-08-10 09:10:25 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2024-03-19 13:30:39 +01:00
|
|
|
result = PyUnicode_New(size, (Py_UCS4)maxchar);
|
|
|
|
if (!result) {
|
2022-08-10 09:10:25 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
2024-03-19 13:30:39 +01:00
|
|
|
if (size > 0 && maxchar <= 0x10ffff &&
|
|
|
|
PyUnicode_Fill(result, 0, size, (Py_UCS4)maxchar) < 0)
|
|
|
|
{
|
|
|
|
Py_DECREF(result);
|
2022-08-10 09:10:25 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
2024-03-19 13:30:39 +01:00
|
|
|
return result;
|
2022-08-10 09:10:25 +02:00
|
|
|
}
|
|
|
|
|
2022-11-29 09:59:56 +02:00
|
|
|
|
2023-05-04 18:25:09 +03:00
|
|
|
static PyObject *
|
|
|
|
unicode_copy(PyObject *unicode)
|
|
|
|
{
|
|
|
|
PyObject *copy;
|
|
|
|
|
|
|
|
if (!unicode) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (!PyUnicode_Check(unicode)) {
|
|
|
|
Py_INCREF(unicode);
|
|
|
|
return unicode;
|
|
|
|
}
|
|
|
|
|
|
|
|
copy = PyUnicode_New(PyUnicode_GET_LENGTH(unicode),
|
|
|
|
PyUnicode_MAX_CHAR_VALUE(unicode));
|
|
|
|
if (!copy) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (PyUnicode_CopyCharacters(copy, 0, unicode,
|
|
|
|
0, PyUnicode_GET_LENGTH(unicode)) < 0)
|
|
|
|
{
|
|
|
|
Py_DECREF(copy);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return copy;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Test PyUnicode_Fill() */
|
|
|
|
static PyObject *
|
|
|
|
unicode_fill(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
PyObject *to, *to_copy;
|
|
|
|
Py_ssize_t start, length, filled;
|
|
|
|
unsigned int fill_char;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "OnnI", &to, &start, &length, &fill_char)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
NULLABLE(to);
|
|
|
|
if (!(to_copy = unicode_copy(to)) && to) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
filled = PyUnicode_Fill(to_copy, start, length, (Py_UCS4)fill_char);
|
|
|
|
if (filled == -1 && PyErr_Occurred()) {
|
|
|
|
Py_DECREF(to_copy);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return Py_BuildValue("(Nn)", to_copy, filled);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Test PyUnicode_FromKindAndData() */
|
|
|
|
static PyObject *
|
|
|
|
unicode_fromkindanddata(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
int kind;
|
|
|
|
void *buffer;
|
|
|
|
Py_ssize_t bsize;
|
|
|
|
Py_ssize_t size = -100;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "iz#|n", &kind, &buffer, &bsize, &size)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (size == -100) {
|
|
|
|
size = bsize;
|
|
|
|
}
|
|
|
|
if (kind && size % kind) {
|
|
|
|
PyErr_SetString(PyExc_AssertionError,
|
|
|
|
"invalid size in unicode_fromkindanddata()");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return PyUnicode_FromKindAndData(kind, buffer, kind ? size / kind : 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2024-03-19 13:30:39 +01:00
|
|
|
// Test PyUnicode_AsUCS4().
|
|
|
|
// Part of the limited C API, but the test needs PyUnicode_FromKindAndData().
|
2022-08-10 09:10:25 +02:00
|
|
|
static PyObject *
|
|
|
|
unicode_asucs4(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
PyObject *unicode, *result;
|
|
|
|
Py_UCS4 *buffer;
|
|
|
|
int copy_null;
|
|
|
|
Py_ssize_t str_len, buf_len;
|
|
|
|
|
2023-05-04 18:25:09 +03:00
|
|
|
if (!PyArg_ParseTuple(args, "Onp:unicode_asucs4", &unicode, &str_len, ©_null)) {
|
2022-08-10 09:10:25 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2023-05-04 18:25:09 +03:00
|
|
|
NULLABLE(unicode);
|
2022-08-10 09:10:25 +02:00
|
|
|
buf_len = str_len + 1;
|
|
|
|
buffer = PyMem_NEW(Py_UCS4, buf_len);
|
|
|
|
if (buffer == NULL) {
|
|
|
|
return PyErr_NoMemory();
|
|
|
|
}
|
|
|
|
memset(buffer, 0, sizeof(Py_UCS4)*buf_len);
|
|
|
|
buffer[str_len] = 0xffffU;
|
|
|
|
|
|
|
|
if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) {
|
|
|
|
PyMem_Free(buffer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len);
|
|
|
|
PyMem_Free(buffer);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2024-03-19 13:30:39 +01:00
|
|
|
|
|
|
|
// Test PyUnicode_AsUCS4Copy().
|
|
|
|
// Part of the limited C API, but the test needs PyUnicode_FromKindAndData().
|
2022-08-10 09:10:25 +02:00
|
|
|
static PyObject *
|
2023-05-04 18:25:09 +03:00
|
|
|
unicode_asucs4copy(PyObject *self, PyObject *args)
|
2022-08-10 09:10:25 +02:00
|
|
|
{
|
|
|
|
PyObject *unicode;
|
2023-05-04 18:25:09 +03:00
|
|
|
Py_UCS4 *buffer;
|
|
|
|
PyObject *result;
|
2022-08-10 09:10:25 +02:00
|
|
|
|
2023-05-04 18:25:09 +03:00
|
|
|
if (!PyArg_ParseTuple(args, "O", &unicode)) {
|
2022-08-10 09:10:25 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2023-05-04 18:25:09 +03:00
|
|
|
NULLABLE(unicode);
|
|
|
|
buffer = PyUnicode_AsUCS4Copy(unicode);
|
2022-08-10 09:10:25 +02:00
|
|
|
if (buffer == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2023-05-04 18:25:09 +03:00
|
|
|
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
|
|
|
buffer,
|
|
|
|
PyUnicode_GET_LENGTH(unicode) + 1);
|
|
|
|
PyMem_FREE(buffer);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Test PyUnicode_AsUTF8() */
|
|
|
|
static PyObject *
|
|
|
|
unicode_asutf8(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
PyObject *unicode;
|
|
|
|
Py_ssize_t buflen;
|
|
|
|
const char *s;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "On", &unicode, &buflen))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
NULLABLE(unicode);
|
|
|
|
s = PyUnicode_AsUTF8(unicode);
|
|
|
|
if (s == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return PyBytes_FromStringAndSize(s, buflen);
|
2022-08-10 09:10:25 +02:00
|
|
|
}
|
|
|
|
|
2022-11-29 09:59:56 +02:00
|
|
|
|
|
|
|
/* Test PyUnicode_CopyCharacters() */
|
2022-08-10 09:10:25 +02:00
|
|
|
static PyObject *
|
|
|
|
unicode_copycharacters(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
PyObject *from, *to, *to_copy;
|
|
|
|
Py_ssize_t from_start, to_start, how_many, copied;
|
|
|
|
|
2023-05-04 18:25:09 +03:00
|
|
|
if (!PyArg_ParseTuple(args, "UnOnn", &to, &to_start,
|
2022-08-10 09:10:25 +02:00
|
|
|
&from, &from_start, &how_many)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2023-05-04 18:25:09 +03:00
|
|
|
NULLABLE(from);
|
2022-08-10 09:10:25 +02:00
|
|
|
if (!(to_copy = PyUnicode_New(PyUnicode_GET_LENGTH(to),
|
|
|
|
PyUnicode_MAX_CHAR_VALUE(to)))) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (PyUnicode_Fill(to_copy, 0, PyUnicode_GET_LENGTH(to_copy), 0U) < 0) {
|
|
|
|
Py_DECREF(to_copy);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2023-05-04 18:25:09 +03:00
|
|
|
copied = PyUnicode_CopyCharacters(to_copy, to_start, from,
|
|
|
|
from_start, how_many);
|
|
|
|
if (copied == -1 && PyErr_Occurred()) {
|
2022-08-10 09:10:25 +02:00
|
|
|
Py_DECREF(to_copy);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Py_BuildValue("(Nn)", to_copy, copied);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2024-06-17 17:10:52 +02:00
|
|
|
static PyObject *
|
|
|
|
test_unicodewriter(PyObject *self, PyObject *Py_UNUSED(args))
|
|
|
|
{
|
|
|
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(100);
|
|
|
|
if (writer == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
// test PyUnicodeWriter_WriteUTF8()
|
|
|
|
if (PyUnicodeWriter_WriteUTF8(writer, "var", -1) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
// test PyUnicodeWriter_WriteChar()
|
|
|
|
if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
// test PyUnicodeWriter_WriteSubstring()
|
|
|
|
PyObject *str = PyUnicode_FromString("[long]");
|
|
|
|
if (str == NULL) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
int ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
|
|
|
|
Py_CLEAR(str);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
// test PyUnicodeWriter_WriteStr()
|
|
|
|
str = PyUnicode_FromString(" value ");
|
|
|
|
if (str == NULL) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
ret = PyUnicodeWriter_WriteStr(writer, str);
|
|
|
|
Py_CLEAR(str);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
// test PyUnicodeWriter_WriteRepr()
|
|
|
|
str = PyUnicode_FromString("repr");
|
|
|
|
if (str == NULL) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
ret = PyUnicodeWriter_WriteRepr(writer, str);
|
|
|
|
Py_CLEAR(str);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
|
|
if (result == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
assert(PyUnicode_EqualToUTF8(result, "var=long value 'repr'"));
|
|
|
|
Py_DECREF(result);
|
|
|
|
|
|
|
|
Py_RETURN_NONE;
|
|
|
|
|
|
|
|
error:
|
|
|
|
PyUnicodeWriter_Discard(writer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
test_unicodewriter_utf8(PyObject *self, PyObject *Py_UNUSED(args))
|
|
|
|
{
|
|
|
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
|
|
|
if (writer == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteUTF8(writer, "ascii", -1) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteUTF8(writer, "latin1=\xC3\xA9", -1) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteUTF8(writer, "euro=\xE2\x82\xAC", -1) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
|
|
if (result == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
assert(PyUnicode_EqualToUTF8(result,
|
|
|
|
"ascii-latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
|
|
|
|
Py_DECREF(result);
|
|
|
|
|
|
|
|
Py_RETURN_NONE;
|
|
|
|
|
|
|
|
error:
|
|
|
|
PyUnicodeWriter_Discard(writer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
test_unicodewriter_invalid_utf8(PyObject *self, PyObject *Py_UNUSED(args))
|
|
|
|
{
|
|
|
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
|
|
|
if (writer == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
assert(PyUnicodeWriter_WriteUTF8(writer, "invalid=\xFF", -1) < 0);
|
|
|
|
PyUnicodeWriter_Discard(writer);
|
|
|
|
|
|
|
|
assert(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError));
|
|
|
|
PyErr_Clear();
|
|
|
|
|
|
|
|
Py_RETURN_NONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
|
|
|
|
{
|
|
|
|
// test recovering from PyUnicodeWriter_WriteUTF8() error
|
|
|
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
|
|
|
if (writer == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
assert(PyUnicodeWriter_WriteUTF8(writer, "value=", -1) == 0);
|
|
|
|
|
|
|
|
// write fails with an invalid string
|
|
|
|
assert(PyUnicodeWriter_WriteUTF8(writer, "invalid\xFF", -1) < 0);
|
|
|
|
PyErr_Clear();
|
|
|
|
|
|
|
|
// retry write with a valid string
|
|
|
|
assert(PyUnicodeWriter_WriteUTF8(writer, "valid", -1) == 0);
|
|
|
|
|
|
|
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
|
|
if (result == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
assert(PyUnicode_EqualToUTF8(result, "value=valid"));
|
|
|
|
Py_DECREF(result);
|
|
|
|
|
|
|
|
Py_RETURN_NONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2024-06-21 19:33:15 +02:00
|
|
|
static PyObject *
|
|
|
|
test_unicodewriter_decode_utf8(PyObject *self, PyObject *Py_UNUSED(args))
|
|
|
|
{
|
|
|
|
// test PyUnicodeWriter_DecodeUTF8Stateful()
|
|
|
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
|
|
|
if (writer == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "ign\xFFore", -1, "ignore", NULL) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "replace\xFF", -1, "replace", NULL) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
// incomplete trailing UTF-8 sequence
|
|
|
|
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "replace", NULL) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
|
|
if (result == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
assert(PyUnicode_EqualToUTF8(result,
|
|
|
|
"ignore-replace\xef\xbf\xbd"
|
|
|
|
"-incomplete\xef\xbf\xbd"));
|
|
|
|
Py_DECREF(result);
|
|
|
|
|
|
|
|
Py_RETURN_NONE;
|
|
|
|
|
|
|
|
error:
|
|
|
|
PyUnicodeWriter_Discard(writer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
test_unicodewriter_decode_utf8_consumed(PyObject *self, PyObject *Py_UNUSED(args))
|
|
|
|
{
|
|
|
|
// test PyUnicodeWriter_DecodeUTF8Stateful()
|
|
|
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
|
|
|
if (writer == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
Py_ssize_t consumed;
|
|
|
|
|
|
|
|
// valid string
|
|
|
|
consumed = 12345;
|
|
|
|
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "text", -1, NULL, &consumed) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
assert(consumed == 4);
|
|
|
|
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
// non-ASCII
|
|
|
|
consumed = 12345;
|
|
|
|
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "\xC3\xA9-\xE2\x82\xAC", 6, NULL, &consumed) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
assert(consumed == 6);
|
|
|
|
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
// consumed is 0 if write fails
|
|
|
|
consumed = 12345;
|
|
|
|
assert(PyUnicodeWriter_DecodeUTF8Stateful(writer, "invalid\xFF", -1, NULL, &consumed) < 0);
|
|
|
|
PyErr_Clear();
|
|
|
|
assert(consumed == 0);
|
|
|
|
|
|
|
|
// ignore error handler
|
|
|
|
consumed = 12345;
|
|
|
|
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "more\xFF", -1, "ignore", &consumed) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
assert(consumed == 5);
|
|
|
|
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
// incomplete trailing UTF-8 sequence
|
|
|
|
consumed = 12345;
|
|
|
|
if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "ignore", &consumed) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
assert(consumed == 10);
|
|
|
|
|
|
|
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
|
|
if (result == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
assert(PyUnicode_EqualToUTF8(result,
|
|
|
|
"text-\xC3\xA9-\xE2\x82\xAC-"
|
|
|
|
"more-incomplete"));
|
|
|
|
Py_DECREF(result);
|
|
|
|
|
|
|
|
Py_RETURN_NONE;
|
|
|
|
|
|
|
|
error:
|
|
|
|
PyUnicodeWriter_Discard(writer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2024-06-17 17:10:52 +02:00
|
|
|
static PyObject *
|
|
|
|
test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
|
|
|
|
{
|
|
|
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
|
|
|
if (writer == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
// test PyUnicodeWriter_Format()
|
|
|
|
if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
// test PyUnicodeWriter_WriteChar()
|
|
|
|
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
|
|
if (result == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
|
|
|
|
Py_DECREF(result);
|
|
|
|
|
|
|
|
Py_RETURN_NONE;
|
|
|
|
|
|
|
|
error:
|
|
|
|
PyUnicodeWriter_Discard(writer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
test_unicodewriter_format_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
|
|
|
|
{
|
|
|
|
// test recovering from PyUnicodeWriter_Format() error
|
|
|
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
|
|
|
if (writer == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(PyUnicodeWriter_Format(writer, "%s ", "Hello") == 0);
|
|
|
|
|
|
|
|
// PyUnicodeWriter_Format() fails with an invalid format string
|
|
|
|
assert(PyUnicodeWriter_Format(writer, "%s\xff", "World") < 0);
|
|
|
|
PyErr_Clear();
|
|
|
|
|
|
|
|
// Retry PyUnicodeWriter_Format() with a valid format string
|
|
|
|
assert(PyUnicodeWriter_Format(writer, "%s.", "World") == 0);
|
|
|
|
|
|
|
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
|
|
if (result == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
assert(PyUnicode_EqualToUTF8(result, "Hello World."));
|
|
|
|
Py_DECREF(result);
|
|
|
|
|
|
|
|
Py_RETURN_NONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2024-06-21 19:33:15 +02:00
|
|
|
static PyObject *
|
|
|
|
test_unicodewriter_widechar(PyObject *self, PyObject *Py_UNUSED(args))
|
|
|
|
{
|
|
|
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
|
|
|
if (writer == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteWideChar(writer, L"latin1=\xE9 IGNORED", 8) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteWideChar(writer, L"-", 1) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteWideChar(writer, L"euro=\u20AC", -1) < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
|
|
|
if (result == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
assert(PyUnicode_EqualToUTF8(result,
|
|
|
|
"latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
|
|
|
|
Py_DECREF(result);
|
|
|
|
|
|
|
|
Py_RETURN_NONE;
|
|
|
|
|
|
|
|
error:
|
|
|
|
PyUnicodeWriter_Discard(writer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-08-10 09:10:25 +02:00
|
|
|
static PyMethodDef TestMethods[] = {
|
2023-05-04 18:25:09 +03:00
|
|
|
{"unicode_new", unicode_new, METH_VARARGS},
|
|
|
|
{"unicode_fill", unicode_fill, METH_VARARGS},
|
|
|
|
{"unicode_fromkindanddata", unicode_fromkindanddata, METH_VARARGS},
|
2022-08-10 09:10:25 +02:00
|
|
|
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
|
2023-05-04 18:25:09 +03:00
|
|
|
{"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS},
|
2022-08-10 09:10:25 +02:00
|
|
|
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
|
|
|
|
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
|
2024-06-17 17:10:52 +02:00
|
|
|
{"test_unicodewriter", test_unicodewriter, METH_NOARGS},
|
|
|
|
{"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS},
|
|
|
|
{"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS},
|
|
|
|
{"test_unicodewriter_recover_error", test_unicodewriter_recover_error, METH_NOARGS},
|
2024-06-21 19:33:15 +02:00
|
|
|
{"test_unicodewriter_decode_utf8", test_unicodewriter_decode_utf8, METH_NOARGS},
|
|
|
|
{"test_unicodewriter_decode_utf8_consumed", test_unicodewriter_decode_utf8_consumed, METH_NOARGS},
|
2024-06-17 17:10:52 +02:00
|
|
|
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS},
|
|
|
|
{"test_unicodewriter_format_recover_error", test_unicodewriter_format_recover_error, METH_NOARGS},
|
2024-06-21 19:33:15 +02:00
|
|
|
{"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS},
|
2022-08-10 09:10:25 +02:00
|
|
|
{NULL},
|
|
|
|
};
|
|
|
|
|
|
|
|
int
|
|
|
|
_PyTestCapi_Init_Unicode(PyObject *m) {
|
|
|
|
if (PyModule_AddFunctions(m, TestMethods) < 0) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|