2011-12-18 21:14:16 +02:00
|
|
|
/*
|
|
|
|
* utility functions
|
|
|
|
*
|
|
|
|
* src/pl/plpython/plpy_util.c
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "mb/pg_wchar.h"
|
|
|
|
#include "utils/memutils.h"
|
|
|
|
#include "utils/palloc.h"
|
|
|
|
|
|
|
|
#include "plpython.h"
|
|
|
|
|
|
|
|
#include "plpy_util.h"
|
|
|
|
|
|
|
|
#include "plpy_elog.h"
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert a Python unicode object to a Python string/bytes object in
|
2014-05-06 12:12:18 -04:00
|
|
|
* PostgreSQL server encoding. Reference ownership is passed to the
|
2011-12-18 21:14:16 +02:00
|
|
|
* caller.
|
|
|
|
*/
|
|
|
|
PyObject *
|
|
|
|
PLyUnicode_Bytes(PyObject *unicode)
|
|
|
|
{
|
2013-05-29 16:58:43 -04:00
|
|
|
PyObject *bytes,
|
|
|
|
*rv;
|
|
|
|
char *utf8string,
|
|
|
|
*encoded;
|
Perform conversion from Python unicode to string/bytes object via UTF-8.
We used to convert the unicode object directly to a string in the server
encoding by calling Python's PyUnicode_AsEncodedString function. In other
words, we used Python's routines to do the encoding. However, that has a
few problems. First of all, it required keeping a mapping table of Python
encoding names and PostgreSQL encodings. But the real killer was that Python
doesn't support EUC_TW and MULE_INTERNAL encodings at all.
Instead, convert the Python unicode object to UTF-8, and use PostgreSQL's
encoding conversion functions to convert from UTF-8 to server encoding. We
were already doing the same in the other direction in PLyUnicode_FromString,
so this is more consistent, too.
Note: This makes SQL_ASCII to behave more leniently. We used to map
SQL_ASCII to Python's 'ascii', which on Python means strict 7-bit ASCII
only, so you got an error if the python string contained anything but pure
ASCII. You no longer get an error; you get the UTF-8 representation of the
string instead.
Backpatch to 9.0, where these conversions were introduced.
Jan Urbański
2012-08-06 13:02:15 +03:00
|
|
|
|
|
|
|
/* First encode the Python unicode object with UTF-8. */
|
|
|
|
bytes = PyUnicode_AsUTF8String(unicode);
|
|
|
|
if (bytes == NULL)
|
|
|
|
PLy_elog(ERROR, "could not convert Python Unicode object to bytes");
|
|
|
|
|
|
|
|
utf8string = PyBytes_AsString(bytes);
|
2013-05-29 16:58:43 -04:00
|
|
|
if (utf8string == NULL)
|
|
|
|
{
|
Perform conversion from Python unicode to string/bytes object via UTF-8.
We used to convert the unicode object directly to a string in the server
encoding by calling Python's PyUnicode_AsEncodedString function. In other
words, we used Python's routines to do the encoding. However, that has a
few problems. First of all, it required keeping a mapping table of Python
encoding names and PostgreSQL encodings. But the real killer was that Python
doesn't support EUC_TW and MULE_INTERNAL encodings at all.
Instead, convert the Python unicode object to UTF-8, and use PostgreSQL's
encoding conversion functions to convert from UTF-8 to server encoding. We
were already doing the same in the other direction in PLyUnicode_FromString,
so this is more consistent, too.
Note: This makes SQL_ASCII to behave more leniently. We used to map
SQL_ASCII to Python's 'ascii', which on Python means strict 7-bit ASCII
only, so you got an error if the python string contained anything but pure
ASCII. You no longer get an error; you get the UTF-8 representation of the
string instead.
Backpatch to 9.0, where these conversions were introduced.
Jan Urbański
2012-08-06 13:02:15 +03:00
|
|
|
Py_DECREF(bytes);
|
|
|
|
PLy_elog(ERROR, "could not extract bytes from encoded string");
|
|
|
|
}
|
2011-12-18 21:14:16 +02:00
|
|
|
|
|
|
|
/*
|
Perform conversion from Python unicode to string/bytes object via UTF-8.
We used to convert the unicode object directly to a string in the server
encoding by calling Python's PyUnicode_AsEncodedString function. In other
words, we used Python's routines to do the encoding. However, that has a
few problems. First of all, it required keeping a mapping table of Python
encoding names and PostgreSQL encodings. But the real killer was that Python
doesn't support EUC_TW and MULE_INTERNAL encodings at all.
Instead, convert the Python unicode object to UTF-8, and use PostgreSQL's
encoding conversion functions to convert from UTF-8 to server encoding. We
were already doing the same in the other direction in PLyUnicode_FromString,
so this is more consistent, too.
Note: This makes SQL_ASCII to behave more leniently. We used to map
SQL_ASCII to Python's 'ascii', which on Python means strict 7-bit ASCII
only, so you got an error if the python string contained anything but pure
ASCII. You no longer get an error; you get the UTF-8 representation of the
string instead.
Backpatch to 9.0, where these conversions were introduced.
Jan Urbański
2012-08-06 13:02:15 +03:00
|
|
|
* Then convert to server encoding if necessary.
|
|
|
|
*
|
|
|
|
* PyUnicode_AsEncodedString could be used to encode the object directly
|
|
|
|
* in the server encoding, but Python doesn't support all the encodings
|
|
|
|
* that PostgreSQL does (EUC_TW and MULE_INTERNAL). UTF-8 is used as an
|
|
|
|
* intermediary in PLyUnicode_FromString as well.
|
2011-12-18 21:14:16 +02:00
|
|
|
*/
|
Perform conversion from Python unicode to string/bytes object via UTF-8.
We used to convert the unicode object directly to a string in the server
encoding by calling Python's PyUnicode_AsEncodedString function. In other
words, we used Python's routines to do the encoding. However, that has a
few problems. First of all, it required keeping a mapping table of Python
encoding names and PostgreSQL encodings. But the real killer was that Python
doesn't support EUC_TW and MULE_INTERNAL encodings at all.
Instead, convert the Python unicode object to UTF-8, and use PostgreSQL's
encoding conversion functions to convert from UTF-8 to server encoding. We
were already doing the same in the other direction in PLyUnicode_FromString,
so this is more consistent, too.
Note: This makes SQL_ASCII to behave more leniently. We used to map
SQL_ASCII to Python's 'ascii', which on Python means strict 7-bit ASCII
only, so you got an error if the python string contained anything but pure
ASCII. You no longer get an error; you get the UTF-8 representation of the
string instead.
Backpatch to 9.0, where these conversions were introduced.
Jan Urbański
2012-08-06 13:02:15 +03:00
|
|
|
if (GetDatabaseEncoding() != PG_UTF8)
|
Fix mapping of PostgreSQL encodings to Python encodings.
Windows encodings, "win1252" and so forth, are named differently in Python,
like "cp1252". Also, if the PyUnicode_AsEncodedString() function call fails
for some reason, use a plain ereport(), not a PLy_elog(), to report that
error. That avoids recursion and crash, if PLy_elog() tries to call
PLyUnicode_Bytes() again.
This fixes bug reported by Asif Naeem. Backpatch down to 9.0, before that
plpython didn't even try these conversions.
Jan Urbański, with minor comment improvements by me.
2012-07-05 21:45:24 +03:00
|
|
|
{
|
Perform conversion from Python unicode to string/bytes object via UTF-8.
We used to convert the unicode object directly to a string in the server
encoding by calling Python's PyUnicode_AsEncodedString function. In other
words, we used Python's routines to do the encoding. However, that has a
few problems. First of all, it required keeping a mapping table of Python
encoding names and PostgreSQL encodings. But the real killer was that Python
doesn't support EUC_TW and MULE_INTERNAL encodings at all.
Instead, convert the Python unicode object to UTF-8, and use PostgreSQL's
encoding conversion functions to convert from UTF-8 to server encoding. We
were already doing the same in the other direction in PLyUnicode_FromString,
so this is more consistent, too.
Note: This makes SQL_ASCII to behave more leniently. We used to map
SQL_ASCII to Python's 'ascii', which on Python means strict 7-bit ASCII
only, so you got an error if the python string contained anything but pure
ASCII. You no longer get an error; you get the UTF-8 representation of the
string instead.
Backpatch to 9.0, where these conversions were introduced.
Jan Urbański
2012-08-06 13:02:15 +03:00
|
|
|
PG_TRY();
|
|
|
|
{
|
2014-02-23 16:59:05 -05:00
|
|
|
encoded = pg_any_to_server(utf8string,
|
|
|
|
strlen(utf8string),
|
|
|
|
PG_UTF8);
|
Perform conversion from Python unicode to string/bytes object via UTF-8.
We used to convert the unicode object directly to a string in the server
encoding by calling Python's PyUnicode_AsEncodedString function. In other
words, we used Python's routines to do the encoding. However, that has a
few problems. First of all, it required keeping a mapping table of Python
encoding names and PostgreSQL encodings. But the real killer was that Python
doesn't support EUC_TW and MULE_INTERNAL encodings at all.
Instead, convert the Python unicode object to UTF-8, and use PostgreSQL's
encoding conversion functions to convert from UTF-8 to server encoding. We
were already doing the same in the other direction in PLyUnicode_FromString,
so this is more consistent, too.
Note: This makes SQL_ASCII to behave more leniently. We used to map
SQL_ASCII to Python's 'ascii', which on Python means strict 7-bit ASCII
only, so you got an error if the python string contained anything but pure
ASCII. You no longer get an error; you get the UTF-8 representation of the
string instead.
Backpatch to 9.0, where these conversions were introduced.
Jan Urbański
2012-08-06 13:02:15 +03:00
|
|
|
}
|
|
|
|
PG_CATCH();
|
|
|
|
{
|
|
|
|
Py_DECREF(bytes);
|
|
|
|
PG_RE_THROW();
|
|
|
|
}
|
|
|
|
PG_END_TRY();
|
Fix mapping of PostgreSQL encodings to Python encodings.
Windows encodings, "win1252" and so forth, are named differently in Python,
like "cp1252". Also, if the PyUnicode_AsEncodedString() function call fails
for some reason, use a plain ereport(), not a PLy_elog(), to report that
error. That avoids recursion and crash, if PLy_elog() tries to call
PLyUnicode_Bytes() again.
This fixes bug reported by Asif Naeem. Backpatch down to 9.0, before that
plpython didn't even try these conversions.
Jan Urbański, with minor comment improvements by me.
2012-07-05 21:45:24 +03:00
|
|
|
}
|
Perform conversion from Python unicode to string/bytes object via UTF-8.
We used to convert the unicode object directly to a string in the server
encoding by calling Python's PyUnicode_AsEncodedString function. In other
words, we used Python's routines to do the encoding. However, that has a
few problems. First of all, it required keeping a mapping table of Python
encoding names and PostgreSQL encodings. But the real killer was that Python
doesn't support EUC_TW and MULE_INTERNAL encodings at all.
Instead, convert the Python unicode object to UTF-8, and use PostgreSQL's
encoding conversion functions to convert from UTF-8 to server encoding. We
were already doing the same in the other direction in PLyUnicode_FromString,
so this is more consistent, too.
Note: This makes SQL_ASCII to behave more leniently. We used to map
SQL_ASCII to Python's 'ascii', which on Python means strict 7-bit ASCII
only, so you got an error if the python string contained anything but pure
ASCII. You no longer get an error; you get the UTF-8 representation of the
string instead.
Backpatch to 9.0, where these conversions were introduced.
Jan Urbański
2012-08-06 13:02:15 +03:00
|
|
|
else
|
|
|
|
encoded = utf8string;
|
|
|
|
|
|
|
|
/* finally, build a bytes object in the server encoding */
|
|
|
|
rv = PyBytes_FromStringAndSize(encoded, strlen(encoded));
|
|
|
|
|
2014-02-23 16:59:05 -05:00
|
|
|
/* if pg_any_to_server allocated memory, free it now */
|
Perform conversion from Python unicode to string/bytes object via UTF-8.
We used to convert the unicode object directly to a string in the server
encoding by calling Python's PyUnicode_AsEncodedString function. In other
words, we used Python's routines to do the encoding. However, that has a
few problems. First of all, it required keeping a mapping table of Python
encoding names and PostgreSQL encodings. But the real killer was that Python
doesn't support EUC_TW and MULE_INTERNAL encodings at all.
Instead, convert the Python unicode object to UTF-8, and use PostgreSQL's
encoding conversion functions to convert from UTF-8 to server encoding. We
were already doing the same in the other direction in PLyUnicode_FromString,
so this is more consistent, too.
Note: This makes SQL_ASCII to behave more leniently. We used to map
SQL_ASCII to Python's 'ascii', which on Python means strict 7-bit ASCII
only, so you got an error if the python string contained anything but pure
ASCII. You no longer get an error; you get the UTF-8 representation of the
string instead.
Backpatch to 9.0, where these conversions were introduced.
Jan Urbański
2012-08-06 13:02:15 +03:00
|
|
|
if (utf8string != encoded)
|
|
|
|
pfree(encoded);
|
Fix mapping of PostgreSQL encodings to Python encodings.
Windows encodings, "win1252" and so forth, are named differently in Python,
like "cp1252". Also, if the PyUnicode_AsEncodedString() function call fails
for some reason, use a plain ereport(), not a PLy_elog(), to report that
error. That avoids recursion and crash, if PLy_elog() tries to call
PLyUnicode_Bytes() again.
This fixes bug reported by Asif Naeem. Backpatch down to 9.0, before that
plpython didn't even try these conversions.
Jan Urbański, with minor comment improvements by me.
2012-07-05 21:45:24 +03:00
|
|
|
|
Perform conversion from Python unicode to string/bytes object via UTF-8.
We used to convert the unicode object directly to a string in the server
encoding by calling Python's PyUnicode_AsEncodedString function. In other
words, we used Python's routines to do the encoding. However, that has a
few problems. First of all, it required keeping a mapping table of Python
encoding names and PostgreSQL encodings. But the real killer was that Python
doesn't support EUC_TW and MULE_INTERNAL encodings at all.
Instead, convert the Python unicode object to UTF-8, and use PostgreSQL's
encoding conversion functions to convert from UTF-8 to server encoding. We
were already doing the same in the other direction in PLyUnicode_FromString,
so this is more consistent, too.
Note: This makes SQL_ASCII to behave more leniently. We used to map
SQL_ASCII to Python's 'ascii', which on Python means strict 7-bit ASCII
only, so you got an error if the python string contained anything but pure
ASCII. You no longer get an error; you get the UTF-8 representation of the
string instead.
Backpatch to 9.0, where these conversions were introduced.
Jan Urbański
2012-08-06 13:02:15 +03:00
|
|
|
Py_DECREF(bytes);
|
2011-12-18 21:14:16 +02:00
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert a Python unicode object to a C string in PostgreSQL server
|
|
|
|
* encoding. No Python object reference is passed out of this
|
|
|
|
* function. The result is palloc'ed.
|
|
|
|
*
|
|
|
|
* Note that this function is disguised as PyString_AsString() when
|
2014-05-06 12:12:18 -04:00
|
|
|
* using Python 3. That function retuns a pointer into the internal
|
2011-12-18 21:14:16 +02:00
|
|
|
* memory of the argument, which isn't exactly the interface of this
|
|
|
|
* function. But in either case you get a rather short-lived
|
|
|
|
* reference that you ought to better leave alone.
|
|
|
|
*/
|
|
|
|
char *
|
|
|
|
PLyUnicode_AsString(PyObject *unicode)
|
|
|
|
{
|
|
|
|
PyObject *o = PLyUnicode_Bytes(unicode);
|
|
|
|
char *rv = pstrdup(PyBytes_AsString(o));
|
|
|
|
|
|
|
|
Py_XDECREF(o);
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if PY_MAJOR_VERSION >= 3
|
|
|
|
/*
|
|
|
|
* Convert a C string in the PostgreSQL server encoding to a Python
|
2014-05-06 12:12:18 -04:00
|
|
|
* unicode object. Reference ownership is passed to the caller.
|
2011-12-18 21:14:16 +02:00
|
|
|
*/
|
|
|
|
PyObject *
|
2015-04-26 10:33:14 -04:00
|
|
|
PLyUnicode_FromStringAndSize(const char *s, Py_ssize_t size)
|
2011-12-18 21:14:16 +02:00
|
|
|
{
|
|
|
|
char *utf8string;
|
|
|
|
PyObject *o;
|
|
|
|
|
2015-04-26 10:33:14 -04:00
|
|
|
utf8string = pg_server_to_any(s, size, PG_UTF8);
|
2011-12-18 21:14:16 +02:00
|
|
|
|
2015-04-26 10:33:14 -04:00
|
|
|
if (utf8string == s)
|
|
|
|
{
|
|
|
|
o = PyUnicode_FromStringAndSize(s, size);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
o = PyUnicode_FromString(utf8string);
|
2011-12-18 21:14:16 +02:00
|
|
|
pfree(utf8string);
|
2015-04-26 10:33:14 -04:00
|
|
|
}
|
2011-12-18 21:14:16 +02:00
|
|
|
|
|
|
|
return o;
|
|
|
|
}
|
2012-06-10 15:20:04 -04:00
|
|
|
|
2015-04-26 10:33:14 -04:00
|
|
|
PyObject *
|
|
|
|
PLyUnicode_FromString(const char *s)
|
|
|
|
{
|
|
|
|
return PLyUnicode_FromStringAndSize(s, strlen(s));
|
|
|
|
}
|
|
|
|
|
2011-12-18 21:14:16 +02:00
|
|
|
#endif /* PY_MAJOR_VERSION >= 3 */
|