2012-01-31 11:48:23 -05:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* json.c
|
|
|
|
* JSON data type support.
|
|
|
|
*
|
2017-01-03 13:48:53 -05:00
|
|
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
2012-01-31 11:48:23 -05:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/utils/adt/json.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2012-08-30 16:15:44 -04:00
|
|
|
#include "access/htup_details.h"
|
2013-03-10 17:35:36 -04:00
|
|
|
#include "access/transam.h"
|
2012-02-03 12:11:16 -05:00
|
|
|
#include "catalog/pg_type.h"
|
|
|
|
#include "executor/spi.h"
|
2012-01-31 11:48:23 -05:00
|
|
|
#include "lib/stringinfo.h"
|
|
|
|
#include "libpq/pqformat.h"
|
|
|
|
#include "mb/pg_wchar.h"
|
2014-06-03 18:26:47 -04:00
|
|
|
#include "miscadmin.h"
|
2012-02-03 12:11:16 -05:00
|
|
|
#include "parser/parse_coerce.h"
|
|
|
|
#include "utils/array.h"
|
2012-01-31 11:48:23 -05:00
|
|
|
#include "utils/builtins.h"
|
2014-08-17 22:57:15 -04:00
|
|
|
#include "utils/date.h"
|
2014-06-03 18:26:47 -04:00
|
|
|
#include "utils/datetime.h"
|
2012-02-03 12:11:16 -05:00
|
|
|
#include "utils/lsyscache.h"
|
2012-01-31 11:48:23 -05:00
|
|
|
#include "utils/json.h"
|
2013-03-29 14:12:13 -04:00
|
|
|
#include "utils/jsonapi.h"
|
2012-02-03 12:11:16 -05:00
|
|
|
#include "utils/typcache.h"
|
2013-03-29 14:12:13 -04:00
|
|
|
#include "utils/syscache.h"
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
/*
|
|
|
|
* The context of the parser is maintained by the recursive descent
|
|
|
|
* mechanism, but is passed explicitly to the error reporting routine
|
|
|
|
* for better diagnostics.
|
|
|
|
*/
|
|
|
|
typedef enum /* contexts of JSON parser */
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
|
|
|
JSON_PARSE_VALUE, /* expecting a value */
|
2013-03-29 14:12:13 -04:00
|
|
|
JSON_PARSE_STRING, /* expecting a string (for a field name) */
|
2012-01-31 11:48:23 -05:00
|
|
|
JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
|
|
|
|
JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
|
|
|
|
JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
|
|
|
|
JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
|
|
|
|
JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
|
2013-03-29 14:12:13 -04:00
|
|
|
JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
|
|
|
|
JSON_PARSE_END /* saw the end of a document, expect nothing */
|
2013-05-29 16:58:43 -04:00
|
|
|
} JsonParseContext;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
typedef enum /* type categories for datum_to_json */
|
|
|
|
{
|
|
|
|
JSONTYPE_NULL, /* null, so we didn't bother to identify */
|
|
|
|
JSONTYPE_BOOL, /* boolean (built-in types only) */
|
|
|
|
JSONTYPE_NUMERIC, /* numeric (ditto) */
|
2014-08-17 22:57:15 -04:00
|
|
|
JSONTYPE_DATE, /* we use special formatting for datetimes */
|
|
|
|
JSONTYPE_TIMESTAMP,
|
|
|
|
JSONTYPE_TIMESTAMPTZ,
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
JSONTYPE_JSON, /* JSON itself (and JSONB) */
|
|
|
|
JSONTYPE_ARRAY, /* array */
|
|
|
|
JSONTYPE_COMPOSITE, /* composite */
|
|
|
|
JSONTYPE_CAST, /* something with an explicit cast to JSON */
|
|
|
|
JSONTYPE_OTHER /* all else */
|
|
|
|
} JsonTypeCategory;
|
|
|
|
|
2015-09-18 14:39:39 -04:00
|
|
|
typedef struct JsonAggState
|
|
|
|
{
|
2015-10-20 11:06:24 -07:00
|
|
|
StringInfo str;
|
|
|
|
JsonTypeCategory key_category;
|
|
|
|
Oid key_output_func;
|
|
|
|
JsonTypeCategory val_category;
|
|
|
|
Oid val_output_func;
|
2015-09-18 14:39:39 -04:00
|
|
|
} JsonAggState;
|
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
static inline void json_lex(JsonLexContext *lex);
|
|
|
|
static inline void json_lex_string(JsonLexContext *lex);
|
2016-02-03 01:39:08 -05:00
|
|
|
static inline void json_lex_number(JsonLexContext *lex, char *s,
|
|
|
|
bool *num_err, int *total_len);
|
2013-07-20 06:38:31 -04:00
|
|
|
static inline void parse_scalar(JsonLexContext *lex, JsonSemAction *sem);
|
|
|
|
static void parse_object_field(JsonLexContext *lex, JsonSemAction *sem);
|
|
|
|
static void parse_object(JsonLexContext *lex, JsonSemAction *sem);
|
|
|
|
static void parse_array_element(JsonLexContext *lex, JsonSemAction *sem);
|
|
|
|
static void parse_array(JsonLexContext *lex, JsonSemAction *sem);
|
2013-03-29 14:12:13 -04:00
|
|
|
static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
|
2012-01-31 11:48:23 -05:00
|
|
|
static void report_invalid_token(JsonLexContext *lex);
|
2013-03-10 17:35:36 -04:00
|
|
|
static int report_json_context(JsonLexContext *lex);
|
2012-01-31 11:48:23 -05:00
|
|
|
static char *extract_mb_char(char *s);
|
2012-06-12 16:23:45 -04:00
|
|
|
static void composite_to_json(Datum composite, StringInfo result,
|
2014-09-29 13:32:22 -04:00
|
|
|
bool use_line_feeds);
|
2012-02-23 23:44:16 -05:00
|
|
|
static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
|
2012-06-10 15:20:04 -04:00
|
|
|
Datum *vals, bool *nulls, int *valcount,
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
JsonTypeCategory tcategory, Oid outfuncoid,
|
2012-06-10 15:20:04 -04:00
|
|
|
bool use_line_feeds);
|
2012-06-12 16:23:45 -04:00
|
|
|
static void array_to_json_internal(Datum array, StringInfo result,
|
2013-03-10 17:35:36 -04:00
|
|
|
bool use_line_feeds);
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
static void json_categorize_type(Oid typoid,
|
|
|
|
JsonTypeCategory *tcategory,
|
|
|
|
Oid *outfuncoid);
|
2014-01-28 17:48:21 -05:00
|
|
|
static void datum_to_json(Datum val, bool is_null, StringInfo result,
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
JsonTypeCategory tcategory, Oid outfuncoid,
|
|
|
|
bool key_scalar);
|
2014-01-28 17:48:21 -05:00
|
|
|
static void add_json(Datum val, bool is_null, StringInfo result,
|
|
|
|
Oid val_type, bool key_scalar);
|
2014-12-02 15:02:37 -05:00
|
|
|
static text *catenate_stringinfo_string(StringInfo buffer, const char *addon);
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
/* the null action object used for pure validation */
|
2013-07-20 06:38:31 -04:00
|
|
|
static JsonSemAction nullSemAction =
|
2013-03-29 14:12:13 -04:00
|
|
|
{
|
|
|
|
NULL, NULL, NULL, NULL, NULL,
|
|
|
|
NULL, NULL, NULL, NULL, NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Recursive Descent parser support routines */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* lex_peek
|
|
|
|
*
|
|
|
|
* what is the current look_ahead token?
|
|
|
|
*/
|
|
|
|
static inline JsonTokenType
|
|
|
|
lex_peek(JsonLexContext *lex)
|
|
|
|
{
|
|
|
|
return lex->token_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* lex_accept
|
|
|
|
*
|
|
|
|
* accept the look_ahead token and move the lexer to the next token if the
|
|
|
|
* look_ahead token matches the token parameter. In that case, and if required,
|
|
|
|
* also hand back the de-escaped lexeme.
|
|
|
|
*
|
|
|
|
* returns true if the token matched, false otherwise.
|
|
|
|
*/
|
|
|
|
static inline bool
|
|
|
|
lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme)
|
|
|
|
{
|
|
|
|
if (lex->token_type == token)
|
|
|
|
{
|
|
|
|
if (lexeme != NULL)
|
|
|
|
{
|
|
|
|
if (lex->token_type == JSON_TOKEN_STRING)
|
|
|
|
{
|
|
|
|
if (lex->strval != NULL)
|
|
|
|
*lexeme = pstrdup(lex->strval->data);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int len = (lex->token_terminator - lex->token_start);
|
|
|
|
char *tokstr = palloc(len + 1);
|
|
|
|
|
|
|
|
memcpy(tokstr, lex->token_start, len);
|
|
|
|
tokstr[len] = '\0';
|
|
|
|
*lexeme = tokstr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
json_lex(lex);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* lex_accept
|
|
|
|
*
|
|
|
|
* move the lexer to the next token if the current look_ahead token matches
|
|
|
|
* the parameter token. Otherwise, report an error.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
|
|
|
|
{
|
|
|
|
if (!lex_accept(lex, token, NULL))
|
2015-03-31 15:12:27 +03:00
|
|
|
report_parse_error(ctx, lex);
|
2013-03-29 14:12:13 -04:00
|
|
|
}
|
|
|
|
|
2012-06-12 16:23:45 -04:00
|
|
|
/* chars to consider as part of an alphanumeric token */
|
|
|
|
#define JSON_ALPHANUMERIC_CHAR(c) \
|
|
|
|
(((c) >= 'a' && (c) <= 'z') || \
|
|
|
|
((c) >= 'A' && (c) <= 'Z') || \
|
|
|
|
((c) >= '0' && (c) <= '9') || \
|
|
|
|
(c) == '_' || \
|
|
|
|
IS_HIGHBIT_SET(c))
|
|
|
|
|
2016-02-03 01:39:08 -05:00
|
|
|
/*
|
|
|
|
* Utility function to check if a string is a valid JSON number.
|
|
|
|
*
|
|
|
|
* str is of length len, and need not be null-terminated.
|
|
|
|
*/
|
|
|
|
bool
|
2014-12-02 15:02:37 -05:00
|
|
|
IsValidJsonNumber(const char *str, int len)
|
2014-12-01 11:28:45 -05:00
|
|
|
{
|
|
|
|
bool numeric_error;
|
2016-02-03 01:39:08 -05:00
|
|
|
int total_len;
|
2014-12-01 11:28:45 -05:00
|
|
|
JsonLexContext dummy_lex;
|
|
|
|
|
2016-02-03 01:39:08 -05:00
|
|
|
if (len <= 0)
|
|
|
|
return false;
|
2014-12-01 11:28:45 -05:00
|
|
|
|
|
|
|
/*
|
|
|
|
* json_lex_number expects a leading '-' to have been eaten already.
|
|
|
|
*
|
|
|
|
* having to cast away the constness of str is ugly, but there's not much
|
|
|
|
* easy alternative.
|
|
|
|
*/
|
|
|
|
if (*str == '-')
|
|
|
|
{
|
|
|
|
dummy_lex.input = (char *) str + 1;
|
|
|
|
dummy_lex.input_length = len - 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
dummy_lex.input = (char *) str;
|
|
|
|
dummy_lex.input_length = len;
|
|
|
|
}
|
|
|
|
|
2016-02-03 01:39:08 -05:00
|
|
|
json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
|
2014-12-01 11:28:45 -05:00
|
|
|
|
2016-02-03 01:39:08 -05:00
|
|
|
return (!numeric_error) && (total_len == dummy_lex.input_length);
|
2014-12-01 11:28:45 -05:00
|
|
|
}
|
|
|
|
|
2012-01-31 11:48:23 -05:00
|
|
|
/*
|
|
|
|
* Input.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_in(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2013-03-29 14:12:13 -04:00
|
|
|
char *json = PG_GETARG_CSTRING(0);
|
|
|
|
text *result = cstring_to_text(json);
|
|
|
|
JsonLexContext *lex;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
/* validate it */
|
|
|
|
lex = makeJsonLexContext(result, false);
|
2013-07-20 06:38:31 -04:00
|
|
|
pg_parse_json(lex, &nullSemAction);
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2012-06-12 16:23:45 -04:00
|
|
|
/* Internal representation is the same as text, for now */
|
2013-03-29 14:12:13 -04:00
|
|
|
PG_RETURN_TEXT_P(result);
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Output.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_out(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2012-06-12 16:23:45 -04:00
|
|
|
/* we needn't detoast because text_to_cstring will handle that */
|
2012-06-10 15:20:04 -04:00
|
|
|
Datum txt = PG_GETARG_DATUM(0);
|
2012-01-31 11:48:23 -05:00
|
|
|
|
|
|
|
PG_RETURN_CSTRING(TextDatumGetCString(txt));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Binary send.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_send(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
text *t = PG_GETARG_TEXT_PP(0);
|
2012-06-12 16:23:45 -04:00
|
|
|
StringInfoData buf;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
|
|
|
pq_begintypsend(&buf);
|
|
|
|
pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
|
|
|
|
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Binary receive.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_recv(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
|
|
|
char *str;
|
|
|
|
int nbytes;
|
2013-03-29 14:12:13 -04:00
|
|
|
JsonLexContext *lex;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
|
|
|
str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
|
|
|
|
|
|
|
|
/* Validate it. */
|
Introduce jsonb, a structured format for storing json.
The new format accepts exactly the same data as the json type. However, it is
stored in a format that does not require reparsing the orgiginal text in order
to process it, making it much more suitable for indexing and other operations.
Insignificant whitespace is discarded, and the order of object keys is not
preserved. Neither are duplicate object keys kept - the later value for a given
key is the only one stored.
The new type has all the functions and operators that the json type has,
with the exception of the json generation functions (to_json, json_agg etc.)
and with identical semantics. In addition, there are operator classes for
hash and btree indexing, and two classes for GIN indexing, that have no
equivalent in the json type.
This feature grew out of previous work by Oleg Bartunov and Teodor Sigaev, which
was intended to provide similar facilities to a nested hstore type, but which
in the end proved to have some significant compatibility issues.
Authors: Oleg Bartunov, Teodor Sigaev, Peter Geoghegan and Andrew Dunstan.
Review: Andres Freund
2014-03-23 16:40:19 -04:00
|
|
|
lex = makeJsonLexContextCstringLen(str, nbytes, false);
|
2013-07-20 06:38:31 -04:00
|
|
|
pg_parse_json(lex, &nullSemAction);
|
2012-01-31 11:48:23 -05:00
|
|
|
|
Introduce jsonb, a structured format for storing json.
The new format accepts exactly the same data as the json type. However, it is
stored in a format that does not require reparsing the orgiginal text in order
to process it, making it much more suitable for indexing and other operations.
Insignificant whitespace is discarded, and the order of object keys is not
preserved. Neither are duplicate object keys kept - the later value for a given
key is the only one stored.
The new type has all the functions and operators that the json type has,
with the exception of the json generation functions (to_json, json_agg etc.)
and with identical semantics. In addition, there are operator classes for
hash and btree indexing, and two classes for GIN indexing, that have no
equivalent in the json type.
This feature grew out of previous work by Oleg Bartunov and Teodor Sigaev, which
was intended to provide similar facilities to a nested hstore type, but which
in the end proved to have some significant compatibility issues.
Authors: Oleg Bartunov, Teodor Sigaev, Peter Geoghegan and Andrew Dunstan.
Review: Andres Freund
2014-03-23 16:40:19 -04:00
|
|
|
PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes));
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2013-03-29 14:12:13 -04:00
|
|
|
* makeJsonLexContext
|
|
|
|
*
|
|
|
|
* lex constructor, with or without StringInfo object
|
|
|
|
* for de-escaped lexemes.
|
|
|
|
*
|
|
|
|
* Without is better as it makes the processing faster, so only make one
|
|
|
|
* if really required.
|
Introduce jsonb, a structured format for storing json.
The new format accepts exactly the same data as the json type. However, it is
stored in a format that does not require reparsing the orgiginal text in order
to process it, making it much more suitable for indexing and other operations.
Insignificant whitespace is discarded, and the order of object keys is not
preserved. Neither are duplicate object keys kept - the later value for a given
key is the only one stored.
The new type has all the functions and operators that the json type has,
with the exception of the json generation functions (to_json, json_agg etc.)
and with identical semantics. In addition, there are operator classes for
hash and btree indexing, and two classes for GIN indexing, that have no
equivalent in the json type.
This feature grew out of previous work by Oleg Bartunov and Teodor Sigaev, which
was intended to provide similar facilities to a nested hstore type, but which
in the end proved to have some significant compatibility issues.
Authors: Oleg Bartunov, Teodor Sigaev, Peter Geoghegan and Andrew Dunstan.
Review: Andres Freund
2014-03-23 16:40:19 -04:00
|
|
|
*
|
|
|
|
* If you already have the json as a text* value, use the first of these
|
|
|
|
* functions, otherwise use makeJsonLexContextCstringLen().
|
2012-01-31 11:48:23 -05:00
|
|
|
*/
|
2013-03-29 14:12:13 -04:00
|
|
|
JsonLexContext *
|
|
|
|
makeJsonLexContext(text *json, bool need_escapes)
|
Introduce jsonb, a structured format for storing json.
The new format accepts exactly the same data as the json type. However, it is
stored in a format that does not require reparsing the orgiginal text in order
to process it, making it much more suitable for indexing and other operations.
Insignificant whitespace is discarded, and the order of object keys is not
preserved. Neither are duplicate object keys kept - the later value for a given
key is the only one stored.
The new type has all the functions and operators that the json type has,
with the exception of the json generation functions (to_json, json_agg etc.)
and with identical semantics. In addition, there are operator classes for
hash and btree indexing, and two classes for GIN indexing, that have no
equivalent in the json type.
This feature grew out of previous work by Oleg Bartunov and Teodor Sigaev, which
was intended to provide similar facilities to a nested hstore type, but which
in the end proved to have some significant compatibility issues.
Authors: Oleg Bartunov, Teodor Sigaev, Peter Geoghegan and Andrew Dunstan.
Review: Andres Freund
2014-03-23 16:40:19 -04:00
|
|
|
{
|
2017-03-12 19:35:34 -04:00
|
|
|
return makeJsonLexContextCstringLen(VARDATA_ANY(json),
|
|
|
|
VARSIZE_ANY_EXHDR(json),
|
Introduce jsonb, a structured format for storing json.
The new format accepts exactly the same data as the json type. However, it is
stored in a format that does not require reparsing the orgiginal text in order
to process it, making it much more suitable for indexing and other operations.
Insignificant whitespace is discarded, and the order of object keys is not
preserved. Neither are duplicate object keys kept - the later value for a given
key is the only one stored.
The new type has all the functions and operators that the json type has,
with the exception of the json generation functions (to_json, json_agg etc.)
and with identical semantics. In addition, there are operator classes for
hash and btree indexing, and two classes for GIN indexing, that have no
equivalent in the json type.
This feature grew out of previous work by Oleg Bartunov and Teodor Sigaev, which
was intended to provide similar facilities to a nested hstore type, but which
in the end proved to have some significant compatibility issues.
Authors: Oleg Bartunov, Teodor Sigaev, Peter Geoghegan and Andrew Dunstan.
Review: Andres Freund
2014-03-23 16:40:19 -04:00
|
|
|
need_escapes);
|
|
|
|
}
|
|
|
|
|
|
|
|
JsonLexContext *
|
|
|
|
makeJsonLexContextCstringLen(char *json, int len, bool need_escapes)
|
2013-03-29 14:12:13 -04:00
|
|
|
{
|
|
|
|
JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
|
|
|
|
|
Introduce jsonb, a structured format for storing json.
The new format accepts exactly the same data as the json type. However, it is
stored in a format that does not require reparsing the orgiginal text in order
to process it, making it much more suitable for indexing and other operations.
Insignificant whitespace is discarded, and the order of object keys is not
preserved. Neither are duplicate object keys kept - the later value for a given
key is the only one stored.
The new type has all the functions and operators that the json type has,
with the exception of the json generation functions (to_json, json_agg etc.)
and with identical semantics. In addition, there are operator classes for
hash and btree indexing, and two classes for GIN indexing, that have no
equivalent in the json type.
This feature grew out of previous work by Oleg Bartunov and Teodor Sigaev, which
was intended to provide similar facilities to a nested hstore type, but which
in the end proved to have some significant compatibility issues.
Authors: Oleg Bartunov, Teodor Sigaev, Peter Geoghegan and Andrew Dunstan.
Review: Andres Freund
2014-03-23 16:40:19 -04:00
|
|
|
lex->input = lex->token_terminator = lex->line_start = json;
|
2013-03-29 14:12:13 -04:00
|
|
|
lex->line_number = 1;
|
Introduce jsonb, a structured format for storing json.
The new format accepts exactly the same data as the json type. However, it is
stored in a format that does not require reparsing the orgiginal text in order
to process it, making it much more suitable for indexing and other operations.
Insignificant whitespace is discarded, and the order of object keys is not
preserved. Neither are duplicate object keys kept - the later value for a given
key is the only one stored.
The new type has all the functions and operators that the json type has,
with the exception of the json generation functions (to_json, json_agg etc.)
and with identical semantics. In addition, there are operator classes for
hash and btree indexing, and two classes for GIN indexing, that have no
equivalent in the json type.
This feature grew out of previous work by Oleg Bartunov and Teodor Sigaev, which
was intended to provide similar facilities to a nested hstore type, but which
in the end proved to have some significant compatibility issues.
Authors: Oleg Bartunov, Teodor Sigaev, Peter Geoghegan and Andrew Dunstan.
Review: Andres Freund
2014-03-23 16:40:19 -04:00
|
|
|
lex->input_length = len;
|
2013-03-29 14:12:13 -04:00
|
|
|
if (need_escapes)
|
|
|
|
lex->strval = makeStringInfo();
|
|
|
|
return lex;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pg_parse_json
|
|
|
|
*
|
|
|
|
* Publicly visible entry point for the JSON parser.
|
|
|
|
*
|
|
|
|
* lex is a lexing context, set up for the json to be processed by calling
|
2017-05-18 10:33:16 +03:00
|
|
|
* makeJsonLexContext(). sem is a structure of function pointers to semantic
|
2013-03-29 14:12:13 -04:00
|
|
|
* action routines to be called at appropriate spots during parsing, and a
|
|
|
|
* pointer to a state object to be passed to those routines.
|
|
|
|
*/
|
|
|
|
void
|
2013-07-20 06:38:31 -04:00
|
|
|
pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
|
2013-03-29 14:12:13 -04:00
|
|
|
{
|
|
|
|
JsonTokenType tok;
|
|
|
|
|
|
|
|
/* get the initial token */
|
|
|
|
json_lex(lex);
|
|
|
|
|
|
|
|
tok = lex_peek(lex);
|
|
|
|
|
|
|
|
/* parse by recursive descent */
|
|
|
|
switch (tok)
|
|
|
|
{
|
|
|
|
case JSON_TOKEN_OBJECT_START:
|
|
|
|
parse_object(lex, sem);
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_ARRAY_START:
|
|
|
|
parse_array(lex, sem);
|
|
|
|
break;
|
|
|
|
default:
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:18:54 -04:00
|
|
|
parse_scalar(lex, sem); /* json can be a bare scalar */
|
2013-03-29 14:12:13 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
Support JSON negative array subscripts everywhere
Previously, there was an inconsistency across json/jsonb operators that
operate on datums containing JSON arrays -- only some operators
supported negative array count-from-the-end subscripting. Specifically,
only a new-to-9.5 jsonb deletion operator had support (the new "jsonb -
integer" operator). This inconsistency seemed likely to be
counter-intuitive to users. To fix, allow all places where the user can
supply an integer subscript to accept a negative subscript value,
including path-orientated operators and functions, as well as other
extraction operators. This will need to be called out as an
incompatibility in the 9.5 release notes, since it's possible that users
are relying on certain established extraction operators changed here
yielding NULL in the event of a negative subscript.
For the json type, this requires adding a way of cheaply getting the
total JSON array element count ahead of time when parsing arrays with a
negative subscript involved, necessitating an ad-hoc lex and parse.
This is followed by a "conversion" from a negative subscript to its
equivalent positive-wise value using the count. From there on, it's as
if a positive-wise value was originally provided.
Note that there is still a minor inconsistency here across jsonb
deletion operators. Unlike the aforementioned new "-" deletion operator
that accepts an integer on its right hand side, the new "#-" path
orientated deletion variant does not throw an error when it appears like
an array subscript (input that could be recognized by as an integer
literal) is being used on an object, which is wrong-headed. The reason
for not being stricter is that it could be the case that an object pair
happens to have a key value that looks like an integer; in general,
these two possibilities are impossible to differentiate with rhs path
text[] argument elements. However, we still don't allow the "#-"
path-orientated deletion operator to perform array-style subscripting.
Rather, we just return the original left operand value in the event of a
negative subscript (which seems analogous to how the established
"jsonb/json #> text[]" path-orientated operator may yield NULL in the
event of an invalid subscript).
In passing, make SetArrayPath() stricter about not accepting cases where
there is trailing non-numeric garbage bytes rather than a clean NUL
byte. This means, for example, that strings like "10e10" are now not
accepted as an array subscript of 10 by some new-to-9.5 path-orientated
jsonb operators (e.g. the new #- operator). Finally, remove dead code
for jsonb subscript deletion; arguably, this should have been done in
commit b81c7b409.
Peter Geoghegan and Andrew Dunstan
2015-07-17 20:56:13 -04:00
|
|
|
/*
|
|
|
|
* json_count_array_elements
|
|
|
|
*
|
|
|
|
* Returns number of array elements in lex context at start of array token
|
|
|
|
* until end of array token at same nesting level.
|
|
|
|
*
|
|
|
|
* Designed to be called from array_start routines.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
json_count_array_elements(JsonLexContext *lex)
|
|
|
|
{
|
2015-10-20 11:06:24 -07:00
|
|
|
JsonLexContext copylex;
|
|
|
|
int count;
|
Support JSON negative array subscripts everywhere
Previously, there was an inconsistency across json/jsonb operators that
operate on datums containing JSON arrays -- only some operators
supported negative array count-from-the-end subscripting. Specifically,
only a new-to-9.5 jsonb deletion operator had support (the new "jsonb -
integer" operator). This inconsistency seemed likely to be
counter-intuitive to users. To fix, allow all places where the user can
supply an integer subscript to accept a negative subscript value,
including path-orientated operators and functions, as well as other
extraction operators. This will need to be called out as an
incompatibility in the 9.5 release notes, since it's possible that users
are relying on certain established extraction operators changed here
yielding NULL in the event of a negative subscript.
For the json type, this requires adding a way of cheaply getting the
total JSON array element count ahead of time when parsing arrays with a
negative subscript involved, necessitating an ad-hoc lex and parse.
This is followed by a "conversion" from a negative subscript to its
equivalent positive-wise value using the count. From there on, it's as
if a positive-wise value was originally provided.
Note that there is still a minor inconsistency here across jsonb
deletion operators. Unlike the aforementioned new "-" deletion operator
that accepts an integer on its right hand side, the new "#-" path
orientated deletion variant does not throw an error when it appears like
an array subscript (input that could be recognized by as an integer
literal) is being used on an object, which is wrong-headed. The reason
for not being stricter is that it could be the case that an object pair
happens to have a key value that looks like an integer; in general,
these two possibilities are impossible to differentiate with rhs path
text[] argument elements. However, we still don't allow the "#-"
path-orientated deletion operator to perform array-style subscripting.
Rather, we just return the original left operand value in the event of a
negative subscript (which seems analogous to how the established
"jsonb/json #> text[]" path-orientated operator may yield NULL in the
event of an invalid subscript).
In passing, make SetArrayPath() stricter about not accepting cases where
there is trailing non-numeric garbage bytes rather than a clean NUL
byte. This means, for example, that strings like "10e10" are now not
accepted as an array subscript of 10 by some new-to-9.5 path-orientated
jsonb operators (e.g. the new #- operator). Finally, remove dead code
for jsonb subscript deletion; arguably, this should have been done in
commit b81c7b409.
Peter Geoghegan and Andrew Dunstan
2015-07-17 20:56:13 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* It's safe to do this with a shallow copy because the lexical routines
|
2015-10-20 11:06:24 -07:00
|
|
|
* don't scribble on the input. They do scribble on the other pointers
|
|
|
|
* etc, so doing this with a copy makes that safe.
|
Support JSON negative array subscripts everywhere
Previously, there was an inconsistency across json/jsonb operators that
operate on datums containing JSON arrays -- only some operators
supported negative array count-from-the-end subscripting. Specifically,
only a new-to-9.5 jsonb deletion operator had support (the new "jsonb -
integer" operator). This inconsistency seemed likely to be
counter-intuitive to users. To fix, allow all places where the user can
supply an integer subscript to accept a negative subscript value,
including path-orientated operators and functions, as well as other
extraction operators. This will need to be called out as an
incompatibility in the 9.5 release notes, since it's possible that users
are relying on certain established extraction operators changed here
yielding NULL in the event of a negative subscript.
For the json type, this requires adding a way of cheaply getting the
total JSON array element count ahead of time when parsing arrays with a
negative subscript involved, necessitating an ad-hoc lex and parse.
This is followed by a "conversion" from a negative subscript to its
equivalent positive-wise value using the count. From there on, it's as
if a positive-wise value was originally provided.
Note that there is still a minor inconsistency here across jsonb
deletion operators. Unlike the aforementioned new "-" deletion operator
that accepts an integer on its right hand side, the new "#-" path
orientated deletion variant does not throw an error when it appears like
an array subscript (input that could be recognized by as an integer
literal) is being used on an object, which is wrong-headed. The reason
for not being stricter is that it could be the case that an object pair
happens to have a key value that looks like an integer; in general,
these two possibilities are impossible to differentiate with rhs path
text[] argument elements. However, we still don't allow the "#-"
path-orientated deletion operator to perform array-style subscripting.
Rather, we just return the original left operand value in the event of a
negative subscript (which seems analogous to how the established
"jsonb/json #> text[]" path-orientated operator may yield NULL in the
event of an invalid subscript).
In passing, make SetArrayPath() stricter about not accepting cases where
there is trailing non-numeric garbage bytes rather than a clean NUL
byte. This means, for example, that strings like "10e10" are now not
accepted as an array subscript of 10 by some new-to-9.5 path-orientated
jsonb operators (e.g. the new #- operator). Finally, remove dead code
for jsonb subscript deletion; arguably, this should have been done in
commit b81c7b409.
Peter Geoghegan and Andrew Dunstan
2015-07-17 20:56:13 -04:00
|
|
|
*/
|
|
|
|
memcpy(©lex, lex, sizeof(JsonLexContext));
|
2015-10-20 11:06:24 -07:00
|
|
|
copylex.strval = NULL; /* not interested in values here */
|
Support JSON negative array subscripts everywhere
Previously, there was an inconsistency across json/jsonb operators that
operate on datums containing JSON arrays -- only some operators
supported negative array count-from-the-end subscripting. Specifically,
only a new-to-9.5 jsonb deletion operator had support (the new "jsonb -
integer" operator). This inconsistency seemed likely to be
counter-intuitive to users. To fix, allow all places where the user can
supply an integer subscript to accept a negative subscript value,
including path-orientated operators and functions, as well as other
extraction operators. This will need to be called out as an
incompatibility in the 9.5 release notes, since it's possible that users
are relying on certain established extraction operators changed here
yielding NULL in the event of a negative subscript.
For the json type, this requires adding a way of cheaply getting the
total JSON array element count ahead of time when parsing arrays with a
negative subscript involved, necessitating an ad-hoc lex and parse.
This is followed by a "conversion" from a negative subscript to its
equivalent positive-wise value using the count. From there on, it's as
if a positive-wise value was originally provided.
Note that there is still a minor inconsistency here across jsonb
deletion operators. Unlike the aforementioned new "-" deletion operator
that accepts an integer on its right hand side, the new "#-" path
orientated deletion variant does not throw an error when it appears like
an array subscript (input that could be recognized by as an integer
literal) is being used on an object, which is wrong-headed. The reason
for not being stricter is that it could be the case that an object pair
happens to have a key value that looks like an integer; in general,
these two possibilities are impossible to differentiate with rhs path
text[] argument elements. However, we still don't allow the "#-"
path-orientated deletion operator to perform array-style subscripting.
Rather, we just return the original left operand value in the event of a
negative subscript (which seems analogous to how the established
"jsonb/json #> text[]" path-orientated operator may yield NULL in the
event of an invalid subscript).
In passing, make SetArrayPath() stricter about not accepting cases where
there is trailing non-numeric garbage bytes rather than a clean NUL
byte. This means, for example, that strings like "10e10" are now not
accepted as an array subscript of 10 by some new-to-9.5 path-orientated
jsonb operators (e.g. the new #- operator). Finally, remove dead code
for jsonb subscript deletion; arguably, this should have been done in
commit b81c7b409.
Peter Geoghegan and Andrew Dunstan
2015-07-17 20:56:13 -04:00
|
|
|
copylex.lex_level++;
|
|
|
|
|
|
|
|
count = 0;
|
|
|
|
lex_expect(JSON_PARSE_ARRAY_START, ©lex, JSON_TOKEN_ARRAY_START);
|
|
|
|
if (lex_peek(©lex) != JSON_TOKEN_ARRAY_END)
|
|
|
|
{
|
|
|
|
do
|
|
|
|
{
|
|
|
|
count++;
|
|
|
|
parse_array_element(©lex, &nullSemAction);
|
|
|
|
}
|
|
|
|
while (lex_accept(©lex, JSON_TOKEN_COMMA, NULL));
|
|
|
|
}
|
|
|
|
lex_expect(JSON_PARSE_ARRAY_NEXT, ©lex, JSON_TOKEN_ARRAY_END);
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
/*
|
|
|
|
* Recursive Descent parse routines. There is one for each structural
|
|
|
|
* element in a json document:
|
|
|
|
* - scalar (string, number, true, false, null)
|
|
|
|
* - array ( [ ] )
|
|
|
|
* - array element
|
|
|
|
* - object ( { } )
|
|
|
|
* - object field
|
|
|
|
*/
|
|
|
|
static inline void
|
2013-07-20 06:38:31 -04:00
|
|
|
parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
|
2013-03-29 14:12:13 -04:00
|
|
|
{
|
|
|
|
char *val = NULL;
|
|
|
|
json_scalar_action sfunc = sem->scalar;
|
|
|
|
char **valaddr;
|
|
|
|
JsonTokenType tok = lex_peek(lex);
|
|
|
|
|
|
|
|
valaddr = sfunc == NULL ? NULL : &val;
|
|
|
|
|
|
|
|
/* a scalar must be a string, a number, true, false, or null */
|
|
|
|
switch (tok)
|
|
|
|
{
|
|
|
|
case JSON_TOKEN_TRUE:
|
|
|
|
lex_accept(lex, JSON_TOKEN_TRUE, valaddr);
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_FALSE:
|
|
|
|
lex_accept(lex, JSON_TOKEN_FALSE, valaddr);
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_NULL:
|
|
|
|
lex_accept(lex, JSON_TOKEN_NULL, valaddr);
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_NUMBER:
|
|
|
|
lex_accept(lex, JSON_TOKEN_NUMBER, valaddr);
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_STRING:
|
|
|
|
lex_accept(lex, JSON_TOKEN_STRING, valaddr);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
report_parse_error(JSON_PARSE_VALUE, lex);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sfunc != NULL)
|
|
|
|
(*sfunc) (sem->semstate, val, tok);
|
|
|
|
}
|
|
|
|
|
2012-01-31 11:48:23 -05:00
|
|
|
static void
|
2013-07-20 06:38:31 -04:00
|
|
|
parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2013-03-29 14:12:13 -04:00
|
|
|
/*
|
2014-08-09 16:35:29 -04:00
|
|
|
* An object field is "fieldname" : value where value can be a scalar,
|
|
|
|
* object or array. Note: in user-facing docs and error messages, we
|
|
|
|
* generally call a field name a "key".
|
2013-03-29 14:12:13 -04:00
|
|
|
*/
|
|
|
|
|
|
|
|
char *fname = NULL; /* keep compiler quiet */
|
|
|
|
json_ofield_action ostart = sem->object_field_start;
|
|
|
|
json_ofield_action oend = sem->object_field_end;
|
|
|
|
bool isnull;
|
|
|
|
char **fnameaddr = NULL;
|
|
|
|
JsonTokenType tok;
|
|
|
|
|
|
|
|
if (ostart != NULL || oend != NULL)
|
|
|
|
fnameaddr = &fname;
|
|
|
|
|
|
|
|
if (!lex_accept(lex, JSON_TOKEN_STRING, fnameaddr))
|
|
|
|
report_parse_error(JSON_PARSE_STRING, lex);
|
|
|
|
|
|
|
|
lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
|
|
|
|
|
|
|
|
tok = lex_peek(lex);
|
|
|
|
isnull = tok == JSON_TOKEN_NULL;
|
|
|
|
|
|
|
|
if (ostart != NULL)
|
|
|
|
(*ostart) (sem->semstate, fname, isnull);
|
|
|
|
|
|
|
|
switch (tok)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2013-03-29 14:12:13 -04:00
|
|
|
case JSON_TOKEN_OBJECT_START:
|
|
|
|
parse_object(lex, sem);
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_ARRAY_START:
|
|
|
|
parse_array(lex, sem);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
parse_scalar(lex, sem);
|
|
|
|
}
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
if (oend != NULL)
|
|
|
|
(*oend) (sem->semstate, fname, isnull);
|
|
|
|
}
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
static void
|
2013-07-20 06:38:31 -04:00
|
|
|
parse_object(JsonLexContext *lex, JsonSemAction *sem)
|
2013-03-29 14:12:13 -04:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* an object is a possibly empty sequence of object fields, separated by
|
2014-02-07 16:43:03 -03:00
|
|
|
* commas and surrounded by curly braces.
|
2013-03-29 14:12:13 -04:00
|
|
|
*/
|
|
|
|
json_struct_action ostart = sem->object_start;
|
|
|
|
json_struct_action oend = sem->object_end;
|
|
|
|
JsonTokenType tok;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2015-10-05 10:06:29 -04:00
|
|
|
check_stack_depth();
|
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
if (ostart != NULL)
|
|
|
|
(*ostart) (sem->semstate);
|
|
|
|
|
|
|
|
/*
|
2014-03-17 20:47:28 +09:00
|
|
|
* Data inside an object is at a higher nesting level than the object
|
2013-03-29 14:12:13 -04:00
|
|
|
* itself. Note that we increment this after we call the semantic routine
|
|
|
|
* for the object start and restore it before we call the routine for the
|
|
|
|
* object end.
|
|
|
|
*/
|
|
|
|
lex->lex_level++;
|
|
|
|
|
2016-03-15 18:06:11 -04:00
|
|
|
/* we know this will succeed, just clearing the token */
|
2013-03-29 14:12:13 -04:00
|
|
|
lex_expect(JSON_PARSE_OBJECT_START, lex, JSON_TOKEN_OBJECT_START);
|
|
|
|
|
|
|
|
tok = lex_peek(lex);
|
|
|
|
switch (tok)
|
|
|
|
{
|
|
|
|
case JSON_TOKEN_STRING:
|
|
|
|
parse_object_field(lex, sem);
|
|
|
|
while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
|
|
|
|
parse_object_field(lex, sem);
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_OBJECT_END:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* case of an invalid initial token inside the object */
|
|
|
|
report_parse_error(JSON_PARSE_OBJECT_START, lex);
|
|
|
|
}
|
|
|
|
|
|
|
|
lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
|
|
|
|
|
|
|
|
lex->lex_level--;
|
|
|
|
|
|
|
|
if (oend != NULL)
|
|
|
|
(*oend) (sem->semstate);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2013-07-20 06:38:31 -04:00
|
|
|
parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
|
2013-03-29 14:12:13 -04:00
|
|
|
{
|
|
|
|
json_aelem_action astart = sem->array_element_start;
|
|
|
|
json_aelem_action aend = sem->array_element_end;
|
|
|
|
JsonTokenType tok = lex_peek(lex);
|
|
|
|
|
|
|
|
bool isnull;
|
|
|
|
|
|
|
|
isnull = tok == JSON_TOKEN_NULL;
|
|
|
|
|
|
|
|
if (astart != NULL)
|
|
|
|
(*astart) (sem->semstate, isnull);
|
|
|
|
|
|
|
|
/* an array element is any object, array or scalar */
|
|
|
|
switch (tok)
|
|
|
|
{
|
|
|
|
case JSON_TOKEN_OBJECT_START:
|
|
|
|
parse_object(lex, sem);
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_ARRAY_START:
|
|
|
|
parse_array(lex, sem);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
parse_scalar(lex, sem);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (aend != NULL)
|
|
|
|
(*aend) (sem->semstate, isnull);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2013-07-20 06:38:31 -04:00
|
|
|
parse_array(JsonLexContext *lex, JsonSemAction *sem)
|
2013-03-29 14:12:13 -04:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* an array is a possibly empty sequence of array elements, separated by
|
|
|
|
* commas and surrounded by square brackets.
|
|
|
|
*/
|
|
|
|
json_struct_action astart = sem->array_start;
|
|
|
|
json_struct_action aend = sem->array_end;
|
|
|
|
|
2015-10-05 10:06:29 -04:00
|
|
|
check_stack_depth();
|
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
if (astart != NULL)
|
|
|
|
(*astart) (sem->semstate);
|
|
|
|
|
|
|
|
/*
|
2014-03-17 20:47:28 +09:00
|
|
|
* Data inside an array is at a higher nesting level than the array
|
2013-03-29 14:12:13 -04:00
|
|
|
* itself. Note that we increment this after we call the semantic routine
|
|
|
|
* for the array start and restore it before we call the routine for the
|
|
|
|
* array end.
|
|
|
|
*/
|
|
|
|
lex->lex_level++;
|
|
|
|
|
|
|
|
lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
|
|
|
|
if (lex_peek(lex) != JSON_TOKEN_ARRAY_END)
|
|
|
|
{
|
|
|
|
|
|
|
|
parse_array_element(lex, sem);
|
|
|
|
|
|
|
|
while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
|
|
|
|
parse_array_element(lex, sem);
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
2013-03-29 14:12:13 -04:00
|
|
|
|
|
|
|
lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
|
|
|
|
|
|
|
|
lex->lex_level--;
|
|
|
|
|
|
|
|
if (aend != NULL)
|
|
|
|
(*aend) (sem->semstate);
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lex one token from the input stream.
|
|
|
|
*/
|
2013-03-29 14:12:13 -04:00
|
|
|
static inline void
|
2012-01-31 11:48:23 -05:00
|
|
|
json_lex(JsonLexContext *lex)
|
|
|
|
{
|
|
|
|
char *s;
|
2013-03-29 14:12:13 -04:00
|
|
|
int len;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
|
|
|
/* Skip leading whitespace. */
|
|
|
|
s = lex->token_terminator;
|
2013-03-29 14:12:13 -04:00
|
|
|
len = s - lex->input;
|
|
|
|
while (len < lex->input_length &&
|
|
|
|
(*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
|
|
|
|
{
|
|
|
|
if (*s == '\n')
|
|
|
|
++lex->line_number;
|
|
|
|
++s;
|
|
|
|
++len;
|
|
|
|
}
|
2012-01-31 11:48:23 -05:00
|
|
|
lex->token_start = s;
|
|
|
|
|
|
|
|
/* Determine token type. */
|
2013-03-29 14:12:13 -04:00
|
|
|
if (len >= lex->input_length)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2013-03-29 14:12:13 -04:00
|
|
|
lex->token_start = NULL;
|
|
|
|
lex->prev_token_terminator = lex->token_terminator;
|
|
|
|
lex->token_terminator = s;
|
|
|
|
lex->token_type = JSON_TOKEN_END;
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
else
|
2013-03-29 14:12:13 -04:00
|
|
|
switch (*s)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2013-03-29 14:12:13 -04:00
|
|
|
/* Single-character token, some kind of punctuation mark. */
|
|
|
|
case '{':
|
|
|
|
lex->prev_token_terminator = lex->token_terminator;
|
|
|
|
lex->token_terminator = s + 1;
|
|
|
|
lex->token_type = JSON_TOKEN_OBJECT_START;
|
|
|
|
break;
|
|
|
|
case '}':
|
|
|
|
lex->prev_token_terminator = lex->token_terminator;
|
|
|
|
lex->token_terminator = s + 1;
|
|
|
|
lex->token_type = JSON_TOKEN_OBJECT_END;
|
|
|
|
break;
|
|
|
|
case '[':
|
|
|
|
lex->prev_token_terminator = lex->token_terminator;
|
|
|
|
lex->token_terminator = s + 1;
|
|
|
|
lex->token_type = JSON_TOKEN_ARRAY_START;
|
|
|
|
break;
|
|
|
|
case ']':
|
|
|
|
lex->prev_token_terminator = lex->token_terminator;
|
|
|
|
lex->token_terminator = s + 1;
|
|
|
|
lex->token_type = JSON_TOKEN_ARRAY_END;
|
|
|
|
break;
|
|
|
|
case ',':
|
|
|
|
lex->prev_token_terminator = lex->token_terminator;
|
|
|
|
lex->token_terminator = s + 1;
|
|
|
|
lex->token_type = JSON_TOKEN_COMMA;
|
|
|
|
break;
|
|
|
|
case ':':
|
|
|
|
lex->prev_token_terminator = lex->token_terminator;
|
|
|
|
lex->token_terminator = s + 1;
|
|
|
|
lex->token_type = JSON_TOKEN_COLON;
|
|
|
|
break;
|
|
|
|
case '"':
|
|
|
|
/* string */
|
|
|
|
json_lex_string(lex);
|
|
|
|
lex->token_type = JSON_TOKEN_STRING;
|
|
|
|
break;
|
|
|
|
case '-':
|
|
|
|
/* Negative number. */
|
2016-02-03 01:39:08 -05:00
|
|
|
json_lex_number(lex, s + 1, NULL, NULL);
|
2013-03-29 14:12:13 -04:00
|
|
|
lex->token_type = JSON_TOKEN_NUMBER;
|
|
|
|
break;
|
|
|
|
case '0':
|
|
|
|
case '1':
|
|
|
|
case '2':
|
|
|
|
case '3':
|
|
|
|
case '4':
|
|
|
|
case '5':
|
|
|
|
case '6':
|
|
|
|
case '7':
|
|
|
|
case '8':
|
|
|
|
case '9':
|
|
|
|
/* Positive number. */
|
2016-02-03 01:39:08 -05:00
|
|
|
json_lex_number(lex, s, NULL, NULL);
|
2013-03-29 14:12:13 -04:00
|
|
|
lex->token_type = JSON_TOKEN_NUMBER;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
{
|
|
|
|
char *p;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
/*
|
|
|
|
* We're not dealing with a string, number, legal
|
2014-05-06 12:12:18 -04:00
|
|
|
* punctuation mark, or end of string. The only legal
|
2013-03-29 14:12:13 -04:00
|
|
|
* tokens we might find here are true, false, and null,
|
|
|
|
* but for error reporting purposes we scan until we see a
|
2014-05-06 12:12:18 -04:00
|
|
|
* non-alphanumeric character. That way, we can report
|
2013-03-29 14:12:13 -04:00
|
|
|
* the whole word as an unexpected token, rather than just
|
|
|
|
* some unintuitive prefix thereof.
|
|
|
|
*/
|
2013-06-12 19:51:12 -04:00
|
|
|
for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)
|
2013-03-29 14:12:13 -04:00
|
|
|
/* skip */ ;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We got some sort of unexpected punctuation or an
|
|
|
|
* otherwise unexpected character, so just complain about
|
|
|
|
* that one character.
|
|
|
|
*/
|
|
|
|
if (p == s)
|
|
|
|
{
|
|
|
|
lex->prev_token_terminator = lex->token_terminator;
|
|
|
|
lex->token_terminator = s + 1;
|
|
|
|
report_invalid_token(lex);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We've got a real alphanumeric token here. If it
|
|
|
|
* happens to be true, false, or null, all is well. If
|
|
|
|
* not, error out.
|
|
|
|
*/
|
|
|
|
lex->prev_token_terminator = lex->token_terminator;
|
|
|
|
lex->token_terminator = p;
|
|
|
|
if (p - s == 4)
|
|
|
|
{
|
|
|
|
if (memcmp(s, "true", 4) == 0)
|
|
|
|
lex->token_type = JSON_TOKEN_TRUE;
|
|
|
|
else if (memcmp(s, "null", 4) == 0)
|
|
|
|
lex->token_type = JSON_TOKEN_NULL;
|
|
|
|
else
|
|
|
|
report_invalid_token(lex);
|
|
|
|
}
|
|
|
|
else if (p - s == 5 && memcmp(s, "false", 5) == 0)
|
|
|
|
lex->token_type = JSON_TOKEN_FALSE;
|
|
|
|
else
|
|
|
|
report_invalid_token(lex);
|
|
|
|
|
|
|
|
}
|
|
|
|
} /* end of switch */
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The next token in the input stream is known to be a string; lex it.
|
|
|
|
*/
|
2013-03-29 14:12:13 -04:00
|
|
|
static inline void
|
2012-01-31 11:48:23 -05:00
|
|
|
json_lex_string(JsonLexContext *lex)
|
|
|
|
{
|
2012-06-12 16:23:45 -04:00
|
|
|
char *s;
|
2013-03-29 14:12:13 -04:00
|
|
|
int len;
|
2013-06-08 09:12:48 -04:00
|
|
|
int hi_surrogate = -1;
|
2013-03-29 14:12:13 -04:00
|
|
|
|
|
|
|
if (lex->strval != NULL)
|
|
|
|
resetStringInfo(lex->strval);
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2013-06-12 19:51:12 -04:00
|
|
|
Assert(lex->input_length > 0);
|
|
|
|
s = lex->token_start;
|
2013-03-29 14:12:13 -04:00
|
|
|
len = lex->token_start - lex->input;
|
2013-06-12 19:51:12 -04:00
|
|
|
for (;;)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2013-06-12 19:51:12 -04:00
|
|
|
s++;
|
|
|
|
len++;
|
2013-03-29 14:12:13 -04:00
|
|
|
/* Premature end of the string. */
|
|
|
|
if (len >= lex->input_length)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2013-03-29 14:12:13 -04:00
|
|
|
lex->token_terminator = s;
|
|
|
|
report_invalid_token(lex);
|
|
|
|
}
|
2013-06-12 19:51:12 -04:00
|
|
|
else if (*s == '"')
|
|
|
|
break;
|
2013-03-29 14:12:13 -04:00
|
|
|
else if ((unsigned char) *s < 32)
|
|
|
|
{
|
|
|
|
/* Per RFC4627, these characters MUST be escaped. */
|
2012-06-13 19:43:35 -04:00
|
|
|
/* Since *s isn't printable, exclude it from the context string */
|
|
|
|
lex->token_terminator = s;
|
2012-01-31 11:48:23 -05:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2012-06-13 19:43:35 -04:00
|
|
|
errdetail("Character with value 0x%02x must be escaped.",
|
|
|
|
(unsigned char) *s),
|
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
else if (*s == '\\')
|
|
|
|
{
|
|
|
|
/* OK, we have an escape character. */
|
2012-06-12 16:23:45 -04:00
|
|
|
s++;
|
2013-03-29 14:12:13 -04:00
|
|
|
len++;
|
|
|
|
if (len >= lex->input_length)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
|
|
|
lex->token_terminator = s;
|
|
|
|
report_invalid_token(lex);
|
|
|
|
}
|
|
|
|
else if (*s == 'u')
|
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
int i;
|
|
|
|
int ch = 0;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2012-06-12 16:23:45 -04:00
|
|
|
for (i = 1; i <= 4; i++)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2012-06-13 19:43:35 -04:00
|
|
|
s++;
|
2013-03-29 14:12:13 -04:00
|
|
|
len++;
|
|
|
|
if (len >= lex->input_length)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2012-06-13 19:43:35 -04:00
|
|
|
lex->token_terminator = s;
|
2012-01-31 11:48:23 -05:00
|
|
|
report_invalid_token(lex);
|
|
|
|
}
|
2012-06-13 19:43:35 -04:00
|
|
|
else if (*s >= '0' && *s <= '9')
|
|
|
|
ch = (ch * 16) + (*s - '0');
|
|
|
|
else if (*s >= 'a' && *s <= 'f')
|
|
|
|
ch = (ch * 16) + (*s - 'a') + 10;
|
|
|
|
else if (*s >= 'A' && *s <= 'F')
|
|
|
|
ch = (ch * 16) + (*s - 'A') + 10;
|
2012-01-31 11:48:23 -05:00
|
|
|
else
|
|
|
|
{
|
2012-06-13 19:43:35 -04:00
|
|
|
lex->token_terminator = s + pg_mblen(s);
|
2012-01-31 11:48:23 -05:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s",
|
|
|
|
"json"),
|
2012-06-13 19:43:35 -04:00
|
|
|
errdetail("\"\\u\" must be followed by four hexadecimal digits."),
|
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
}
|
2013-03-29 14:12:13 -04:00
|
|
|
if (lex->strval != NULL)
|
|
|
|
{
|
|
|
|
char utf8str[5];
|
|
|
|
int utf8len;
|
|
|
|
|
2013-06-08 09:12:48 -04:00
|
|
|
if (ch >= 0xd800 && ch <= 0xdbff)
|
|
|
|
{
|
|
|
|
if (hi_surrogate != -1)
|
|
|
|
ereport(ERROR,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
|
|
errmsg("invalid input syntax for type %s",
|
|
|
|
"json"),
|
|
|
|
errdetail("Unicode high surrogate must not follow a high surrogate."),
|
|
|
|
report_json_context(lex)));
|
2013-06-08 09:12:48 -04:00
|
|
|
hi_surrogate = (ch & 0x3ff) << 10;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if (ch >= 0xdc00 && ch <= 0xdfff)
|
|
|
|
{
|
|
|
|
if (hi_surrogate == -1)
|
|
|
|
ereport(ERROR,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
|
|
|
errdetail("Unicode low surrogate must follow a high surrogate."),
|
|
|
|
report_json_context(lex)));
|
2013-06-08 09:12:48 -04:00
|
|
|
ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
|
|
|
|
hi_surrogate = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hi_surrogate != -1)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2013-08-09 07:59:53 -04:00
|
|
|
errdetail("Unicode low surrogate must follow a high surrogate."),
|
2013-06-08 09:12:48 -04:00
|
|
|
report_json_context(lex)));
|
|
|
|
|
2013-06-12 13:35:24 -04:00
|
|
|
/*
|
2014-01-22 08:46:51 -05:00
|
|
|
* For UTF8, replace the escape sequence by the actual
|
|
|
|
* utf8 character in lex->strval. Do this also for other
|
|
|
|
* encodings if the escape designates an ASCII character,
|
Fix jsonb Unicode escape processing, and in consequence disallow \u0000.
We've been trying to support \u0000 in JSON values since commit
78ed8e03c67d7333, and have introduced increasingly worse hacks to try to
make it work, such as commit 0ad1a816320a2b53. However, it fundamentally
can't work in the way envisioned, because the stored representation looks
the same as for \\u0000 which is not the same thing at all. It's also
entirely bogus to output \u0000 when de-escaped output is called for.
The right way to do this would be to store an actual 0x00 byte, and then
throw error only if asked to produce de-escaped textual output. However,
getting to that point seems likely to take considerable work and may well
never be practical in the 9.4.x series.
To preserve our options for better behavior while getting rid of the nasty
side-effects of 0ad1a816320a2b53, revert that commit in toto and instead
throw error if \u0000 is used in a context where it needs to be de-escaped.
(These are the same contexts where non-ASCII Unicode escapes throw error
if the database encoding isn't UTF8, so this behavior is by no means
without precedent.)
In passing, make both the \u0000 case and the non-ASCII Unicode case report
ERRCODE_UNTRANSLATABLE_CHARACTER / "unsupported Unicode escape sequence"
rather than claiming there's something wrong with the input syntax.
Back-patch to 9.4, where we have to do something because 0ad1a816320a2b53
broke things for many cases having nothing to do with \u0000. 9.3 also has
bogus behavior, but only for that specific escape value, so given the lack
of field complaints it seems better to leave 9.3 alone.
2015-01-30 14:44:46 -05:00
|
|
|
* otherwise raise an error.
|
2013-06-12 13:35:24 -04:00
|
|
|
*/
|
|
|
|
|
|
|
|
if (ch == 0)
|
|
|
|
{
|
Fix jsonb Unicode escape processing, and in consequence disallow \u0000.
We've been trying to support \u0000 in JSON values since commit
78ed8e03c67d7333, and have introduced increasingly worse hacks to try to
make it work, such as commit 0ad1a816320a2b53. However, it fundamentally
can't work in the way envisioned, because the stored representation looks
the same as for \\u0000 which is not the same thing at all. It's also
entirely bogus to output \u0000 when de-escaped output is called for.
The right way to do this would be to store an actual 0x00 byte, and then
throw error only if asked to produce de-escaped textual output. However,
getting to that point seems likely to take considerable work and may well
never be practical in the 9.4.x series.
To preserve our options for better behavior while getting rid of the nasty
side-effects of 0ad1a816320a2b53, revert that commit in toto and instead
throw error if \u0000 is used in a context where it needs to be de-escaped.
(These are the same contexts where non-ASCII Unicode escapes throw error
if the database encoding isn't UTF8, so this behavior is by no means
without precedent.)
In passing, make both the \u0000 case and the non-ASCII Unicode case report
ERRCODE_UNTRANSLATABLE_CHARACTER / "unsupported Unicode escape sequence"
rather than claiming there's something wrong with the input syntax.
Back-patch to 9.4, where we have to do something because 0ad1a816320a2b53
broke things for many cases having nothing to do with \u0000. 9.3 also has
bogus behavior, but only for that specific escape value, so given the lack
of field complaints it seems better to leave 9.3 alone.
2015-01-30 14:44:46 -05:00
|
|
|
/* We can't allow this, since our TEXT type doesn't */
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
errmsg("unsupported Unicode escape sequence"),
|
|
|
|
errdetail("\\u0000 cannot be converted to text."),
|
Fix jsonb Unicode escape processing, and in consequence disallow \u0000.
We've been trying to support \u0000 in JSON values since commit
78ed8e03c67d7333, and have introduced increasingly worse hacks to try to
make it work, such as commit 0ad1a816320a2b53. However, it fundamentally
can't work in the way envisioned, because the stored representation looks
the same as for \\u0000 which is not the same thing at all. It's also
entirely bogus to output \u0000 when de-escaped output is called for.
The right way to do this would be to store an actual 0x00 byte, and then
throw error only if asked to produce de-escaped textual output. However,
getting to that point seems likely to take considerable work and may well
never be practical in the 9.4.x series.
To preserve our options for better behavior while getting rid of the nasty
side-effects of 0ad1a816320a2b53, revert that commit in toto and instead
throw error if \u0000 is used in a context where it needs to be de-escaped.
(These are the same contexts where non-ASCII Unicode escapes throw error
if the database encoding isn't UTF8, so this behavior is by no means
without precedent.)
In passing, make both the \u0000 case and the non-ASCII Unicode case report
ERRCODE_UNTRANSLATABLE_CHARACTER / "unsupported Unicode escape sequence"
rather than claiming there's something wrong with the input syntax.
Back-patch to 9.4, where we have to do something because 0ad1a816320a2b53
broke things for many cases having nothing to do with \u0000. 9.3 also has
bogus behavior, but only for that specific escape value, so given the lack
of field complaints it seems better to leave 9.3 alone.
2015-01-30 14:44:46 -05:00
|
|
|
report_json_context(lex)));
|
2013-06-12 13:35:24 -04:00
|
|
|
}
|
|
|
|
else if (GetDatabaseEncoding() == PG_UTF8)
|
|
|
|
{
|
|
|
|
unicode_to_utf8(ch, (unsigned char *) utf8str);
|
|
|
|
utf8len = pg_utf_mblen((unsigned char *) utf8str);
|
|
|
|
appendBinaryStringInfo(lex->strval, utf8str, utf8len);
|
|
|
|
}
|
|
|
|
else if (ch <= 0x007f)
|
|
|
|
{
|
|
|
|
/*
|
2014-01-22 08:46:51 -05:00
|
|
|
* This is the only way to designate things like a
|
|
|
|
* form feed character in JSON, so it's useful in all
|
|
|
|
* encodings.
|
2013-06-12 13:35:24 -04:00
|
|
|
*/
|
|
|
|
appendStringInfoChar(lex->strval, (char) ch);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ereport(ERROR,
|
Fix jsonb Unicode escape processing, and in consequence disallow \u0000.
We've been trying to support \u0000 in JSON values since commit
78ed8e03c67d7333, and have introduced increasingly worse hacks to try to
make it work, such as commit 0ad1a816320a2b53. However, it fundamentally
can't work in the way envisioned, because the stored representation looks
the same as for \\u0000 which is not the same thing at all. It's also
entirely bogus to output \u0000 when de-escaped output is called for.
The right way to do this would be to store an actual 0x00 byte, and then
throw error only if asked to produce de-escaped textual output. However,
getting to that point seems likely to take considerable work and may well
never be practical in the 9.4.x series.
To preserve our options for better behavior while getting rid of the nasty
side-effects of 0ad1a816320a2b53, revert that commit in toto and instead
throw error if \u0000 is used in a context where it needs to be de-escaped.
(These are the same contexts where non-ASCII Unicode escapes throw error
if the database encoding isn't UTF8, so this behavior is by no means
without precedent.)
In passing, make both the \u0000 case and the non-ASCII Unicode case report
ERRCODE_UNTRANSLATABLE_CHARACTER / "unsupported Unicode escape sequence"
rather than claiming there's something wrong with the input syntax.
Back-patch to 9.4, where we have to do something because 0ad1a816320a2b53
broke things for many cases having nothing to do with \u0000. 9.3 also has
bogus behavior, but only for that specific escape value, so given the lack
of field complaints it seems better to leave 9.3 alone.
2015-01-30 14:44:46 -05:00
|
|
|
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
errmsg("unsupported Unicode escape sequence"),
|
2013-08-09 07:59:53 -04:00
|
|
|
errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
|
2013-06-12 13:35:24 -04:00
|
|
|
report_json_context(lex)));
|
|
|
|
}
|
2013-03-29 14:12:13 -04:00
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (lex->strval != NULL)
|
|
|
|
{
|
2013-06-08 09:12:48 -04:00
|
|
|
if (hi_surrogate != -1)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s",
|
|
|
|
"json"),
|
2013-08-09 07:59:53 -04:00
|
|
|
errdetail("Unicode low surrogate must follow a high surrogate."),
|
2013-06-08 09:12:48 -04:00
|
|
|
report_json_context(lex)));
|
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
switch (*s)
|
|
|
|
{
|
|
|
|
case '"':
|
|
|
|
case '\\':
|
|
|
|
case '/':
|
|
|
|
appendStringInfoChar(lex->strval, *s);
|
|
|
|
break;
|
|
|
|
case 'b':
|
|
|
|
appendStringInfoChar(lex->strval, '\b');
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
appendStringInfoChar(lex->strval, '\f');
|
|
|
|
break;
|
|
|
|
case 'n':
|
|
|
|
appendStringInfoChar(lex->strval, '\n');
|
|
|
|
break;
|
|
|
|
case 'r':
|
|
|
|
appendStringInfoChar(lex->strval, '\r');
|
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
appendStringInfoChar(lex->strval, '\t');
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* Not a valid string escape, so error out. */
|
|
|
|
lex->token_terminator = s + pg_mblen(s);
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s",
|
|
|
|
"json"),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
errdetail("Escape sequence \"\\%s\" is invalid.",
|
|
|
|
extract_mb_char(s)),
|
2013-03-29 14:12:13 -04:00
|
|
|
report_json_context(lex)));
|
|
|
|
}
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
2012-06-12 16:23:45 -04:00
|
|
|
else if (strchr("\"\\/bfnrt", *s) == NULL)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2013-03-29 14:12:13 -04:00
|
|
|
/*
|
|
|
|
* Simpler processing if we're not bothered about de-escaping
|
|
|
|
*
|
|
|
|
* It's very tempting to remove the strchr() call here and
|
|
|
|
* replace it with a switch statement, but testing so far has
|
|
|
|
* shown it's not a performance win.
|
|
|
|
*/
|
2012-06-13 19:43:35 -04:00
|
|
|
lex->token_terminator = s + pg_mblen(s);
|
2012-01-31 11:48:23 -05:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2012-06-13 19:43:35 -04:00
|
|
|
errdetail("Escape sequence \"\\%s\" is invalid.",
|
|
|
|
extract_mb_char(s)),
|
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
2013-03-29 14:12:13 -04:00
|
|
|
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
2013-03-29 14:12:13 -04:00
|
|
|
else if (lex->strval != NULL)
|
|
|
|
{
|
2013-06-08 09:12:48 -04:00
|
|
|
if (hi_surrogate != -1)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2013-08-09 07:59:53 -04:00
|
|
|
errdetail("Unicode low surrogate must follow a high surrogate."),
|
2013-06-08 09:12:48 -04:00
|
|
|
report_json_context(lex)));
|
|
|
|
|
2013-03-29 14:12:13 -04:00
|
|
|
appendStringInfoChar(lex->strval, *s);
|
|
|
|
}
|
|
|
|
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
|
2013-06-08 09:12:48 -04:00
|
|
|
if (hi_surrogate != -1)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
errdetail("Unicode low surrogate must follow a high surrogate."),
|
2013-06-08 09:12:48 -04:00
|
|
|
report_json_context(lex)));
|
|
|
|
|
2012-01-31 11:48:23 -05:00
|
|
|
/* Hooray, we found the end of the string! */
|
2013-03-29 14:12:13 -04:00
|
|
|
lex->prev_token_terminator = lex->token_terminator;
|
2012-01-31 11:48:23 -05:00
|
|
|
lex->token_terminator = s + 1;
|
|
|
|
}
|
|
|
|
|
2016-02-03 01:39:08 -05:00
|
|
|
/*
|
2012-01-31 11:48:23 -05:00
|
|
|
* The next token in the input stream is known to be a number; lex it.
|
|
|
|
*
|
|
|
|
* In JSON, a number consists of four parts:
|
|
|
|
*
|
|
|
|
* (1) An optional minus sign ('-').
|
|
|
|
*
|
|
|
|
* (2) Either a single '0', or a string of one or more digits that does not
|
2012-06-10 15:20:04 -04:00
|
|
|
* begin with a '0'.
|
2012-01-31 11:48:23 -05:00
|
|
|
*
|
|
|
|
* (3) An optional decimal part, consisting of a period ('.') followed by
|
2014-05-06 12:12:18 -04:00
|
|
|
* one or more digits. (Note: While this part can be omitted
|
2012-06-10 15:20:04 -04:00
|
|
|
* completely, it's not OK to have only the decimal point without
|
|
|
|
* any digits afterwards.)
|
2012-01-31 11:48:23 -05:00
|
|
|
*
|
|
|
|
* (4) An optional exponent part, consisting of 'e' or 'E', optionally
|
2014-05-06 12:12:18 -04:00
|
|
|
* followed by '+' or '-', followed by one or more digits. (Note:
|
2012-06-10 15:20:04 -04:00
|
|
|
* As with the decimal part, if 'e' or 'E' is present, it must be
|
|
|
|
* followed by at least one digit.)
|
2012-01-31 11:48:23 -05:00
|
|
|
*
|
|
|
|
* The 's' argument to this function points to the ostensible beginning
|
2016-02-03 01:39:08 -05:00
|
|
|
* of part 2 - i.e. the character after any optional minus sign, or the
|
2012-01-31 11:48:23 -05:00
|
|
|
* first character of the string if there is none.
|
|
|
|
*
|
2016-02-03 01:39:08 -05:00
|
|
|
* If num_err is not NULL, we return an error flag to *num_err rather than
|
|
|
|
* raising an error for a badly-formed number. Also, if total_len is not NULL
|
|
|
|
* the distance from lex->input to the token end+1 is returned to *total_len.
|
2012-01-31 11:48:23 -05:00
|
|
|
*/
|
2013-03-29 14:12:13 -04:00
|
|
|
static inline void
|
2016-02-03 01:39:08 -05:00
|
|
|
json_lex_number(JsonLexContext *lex, char *s,
|
|
|
|
bool *num_err, int *total_len)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
bool error = false;
|
2016-02-03 01:39:08 -05:00
|
|
|
int len = s - lex->input;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
|
|
|
/* Part (1): leading sign indicator. */
|
|
|
|
/* Caller already did this for us; so do nothing. */
|
|
|
|
|
|
|
|
/* Part (2): parse main digit string. */
|
2016-02-03 01:39:08 -05:00
|
|
|
if (len < lex->input_length && *s == '0')
|
2013-03-29 14:12:13 -04:00
|
|
|
{
|
2012-06-12 16:23:45 -04:00
|
|
|
s++;
|
2013-03-29 14:12:13 -04:00
|
|
|
len++;
|
|
|
|
}
|
2016-02-03 01:39:08 -05:00
|
|
|
else if (len < lex->input_length && *s >= '1' && *s <= '9')
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
|
|
|
do
|
|
|
|
{
|
2012-06-12 16:23:45 -04:00
|
|
|
s++;
|
2013-03-29 14:12:13 -04:00
|
|
|
len++;
|
2013-06-12 19:51:12 -04:00
|
|
|
} while (len < lex->input_length && *s >= '0' && *s <= '9');
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
else
|
|
|
|
error = true;
|
|
|
|
|
|
|
|
/* Part (3): parse optional decimal portion. */
|
2013-03-29 14:12:13 -04:00
|
|
|
if (len < lex->input_length && *s == '.')
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2012-06-12 16:23:45 -04:00
|
|
|
s++;
|
2013-03-29 14:12:13 -04:00
|
|
|
len++;
|
|
|
|
if (len == lex->input_length || *s < '0' || *s > '9')
|
2012-01-31 11:48:23 -05:00
|
|
|
error = true;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
do
|
|
|
|
{
|
2012-06-12 16:23:45 -04:00
|
|
|
s++;
|
2013-03-29 14:12:13 -04:00
|
|
|
len++;
|
2013-06-12 19:51:12 -04:00
|
|
|
} while (len < lex->input_length && *s >= '0' && *s <= '9');
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Part (4): parse optional exponent. */
|
2013-03-29 14:12:13 -04:00
|
|
|
if (len < lex->input_length && (*s == 'e' || *s == 'E'))
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2012-06-12 16:23:45 -04:00
|
|
|
s++;
|
2013-03-29 14:12:13 -04:00
|
|
|
len++;
|
|
|
|
if (len < lex->input_length && (*s == '+' || *s == '-'))
|
|
|
|
{
|
2012-06-12 16:23:45 -04:00
|
|
|
s++;
|
2013-03-29 14:12:13 -04:00
|
|
|
len++;
|
|
|
|
}
|
|
|
|
if (len == lex->input_length || *s < '0' || *s > '9')
|
2012-01-31 11:48:23 -05:00
|
|
|
error = true;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
do
|
|
|
|
{
|
2012-06-12 16:23:45 -04:00
|
|
|
s++;
|
2013-03-29 14:12:13 -04:00
|
|
|
len++;
|
|
|
|
} while (len < lex->input_length && *s >= '0' && *s <= '9');
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-06-12 16:23:45 -04:00
|
|
|
/*
|
2014-05-06 12:12:18 -04:00
|
|
|
* Check for trailing garbage. As in json_lex(), any alphanumeric stuff
|
2012-06-12 16:23:45 -04:00
|
|
|
* here should be considered part of the token for error-reporting
|
|
|
|
* purposes.
|
|
|
|
*/
|
2016-02-03 01:39:08 -05:00
|
|
|
for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
|
2012-06-12 16:23:45 -04:00
|
|
|
error = true;
|
2013-12-27 17:04:00 -05:00
|
|
|
|
2016-02-03 01:39:08 -05:00
|
|
|
if (total_len != NULL)
|
|
|
|
*total_len = len;
|
|
|
|
|
2013-12-27 17:04:00 -05:00
|
|
|
if (num_err != NULL)
|
|
|
|
{
|
2016-02-03 01:39:08 -05:00
|
|
|
/* let the caller handle any error */
|
2013-12-27 17:04:00 -05:00
|
|
|
*num_err = error;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2016-02-03 01:39:08 -05:00
|
|
|
/* return token endpoint */
|
2013-12-27 17:04:00 -05:00
|
|
|
lex->prev_token_terminator = lex->token_terminator;
|
2016-02-03 01:39:08 -05:00
|
|
|
lex->token_terminator = s;
|
|
|
|
/* handle error if any */
|
2013-12-27 17:04:00 -05:00
|
|
|
if (error)
|
|
|
|
report_invalid_token(lex);
|
|
|
|
}
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Report a parse error.
|
2012-06-13 19:43:35 -04:00
|
|
|
*
|
|
|
|
* lex->token_start and lex->token_terminator must identify the current token.
|
2012-01-31 11:48:23 -05:00
|
|
|
*/
|
|
|
|
static void
|
2013-03-29 14:12:13 -04:00
|
|
|
report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
2012-06-13 19:43:35 -04:00
|
|
|
char *token;
|
2012-06-10 15:20:04 -04:00
|
|
|
int toklen;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
|
|
|
/* Handle case where the input ended prematurely. */
|
2013-03-29 14:12:13 -04:00
|
|
|
if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
|
2012-01-31 11:48:23 -05:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2012-06-13 19:43:35 -04:00
|
|
|
errdetail("The input string ended unexpectedly."),
|
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2012-06-13 19:43:35 -04:00
|
|
|
/* Separate out the current token. */
|
2012-01-31 11:48:23 -05:00
|
|
|
toklen = lex->token_terminator - lex->token_start;
|
|
|
|
token = palloc(toklen + 1);
|
|
|
|
memcpy(token, lex->token_start, toklen);
|
|
|
|
token[toklen] = '\0';
|
|
|
|
|
2012-06-13 19:43:35 -04:00
|
|
|
/* Complain, with the appropriate detail message. */
|
2013-03-29 14:12:13 -04:00
|
|
|
if (ctx == JSON_PARSE_END)
|
2012-06-13 19:43:35 -04:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2012-06-13 19:43:35 -04:00
|
|
|
errdetail("Expected end of input, but found \"%s\".",
|
|
|
|
token),
|
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
else
|
|
|
|
{
|
2013-03-29 14:12:13 -04:00
|
|
|
switch (ctx)
|
2012-01-31 11:48:23 -05:00
|
|
|
{
|
|
|
|
case JSON_PARSE_VALUE:
|
2012-06-13 19:43:35 -04:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2012-06-13 19:43:35 -04:00
|
|
|
errdetail("Expected JSON value, but found \"%s\".",
|
|
|
|
token),
|
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
break;
|
2013-03-29 14:12:13 -04:00
|
|
|
case JSON_PARSE_STRING:
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2013-03-29 14:12:13 -04:00
|
|
|
errdetail("Expected string, but found \"%s\".",
|
|
|
|
token),
|
|
|
|
report_json_context(lex)));
|
|
|
|
break;
|
2012-01-31 11:48:23 -05:00
|
|
|
case JSON_PARSE_ARRAY_START:
|
2012-06-13 19:43:35 -04:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2012-06-13 19:43:35 -04:00
|
|
|
errdetail("Expected array element or \"]\", but found \"%s\".",
|
|
|
|
token),
|
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
break;
|
|
|
|
case JSON_PARSE_ARRAY_NEXT:
|
2012-06-13 19:43:35 -04:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
errdetail("Expected \",\" or \"]\", but found \"%s\".",
|
|
|
|
token),
|
2012-06-13 19:43:35 -04:00
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
break;
|
|
|
|
case JSON_PARSE_OBJECT_START:
|
2012-06-13 19:43:35 -04:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
errdetail("Expected string or \"}\", but found \"%s\".",
|
|
|
|
token),
|
2012-06-13 19:43:35 -04:00
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
break;
|
|
|
|
case JSON_PARSE_OBJECT_LABEL:
|
2012-06-13 19:43:35 -04:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2012-06-13 19:43:35 -04:00
|
|
|
errdetail("Expected \":\", but found \"%s\".",
|
|
|
|
token),
|
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
break;
|
|
|
|
case JSON_PARSE_OBJECT_NEXT:
|
2012-06-13 19:43:35 -04:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
errdetail("Expected \",\" or \"}\", but found \"%s\".",
|
|
|
|
token),
|
2012-06-13 19:43:35 -04:00
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
break;
|
|
|
|
case JSON_PARSE_OBJECT_COMMA:
|
2012-06-13 19:43:35 -04:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2012-06-13 19:43:35 -04:00
|
|
|
errdetail("Expected string, but found \"%s\".",
|
|
|
|
token),
|
|
|
|
report_json_context(lex)));
|
2012-01-31 11:48:23 -05:00
|
|
|
break;
|
2012-06-13 19:43:35 -04:00
|
|
|
default:
|
2013-03-29 14:12:13 -04:00
|
|
|
elog(ERROR, "unexpected json parse state: %d", ctx);
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Report an invalid input token.
|
2012-06-13 19:43:35 -04:00
|
|
|
*
|
|
|
|
* lex->token_start and lex->token_terminator must identify the token.
|
2012-01-31 11:48:23 -05:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
report_invalid_token(JsonLexContext *lex)
|
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
char *token;
|
|
|
|
int toklen;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
2012-06-13 19:43:35 -04:00
|
|
|
/* Separate out the offending token. */
|
2012-01-31 11:48:23 -05:00
|
|
|
toklen = lex->token_terminator - lex->token_start;
|
|
|
|
token = palloc(toklen + 1);
|
|
|
|
memcpy(token, lex->token_start, toklen);
|
|
|
|
token[toklen] = '\0';
|
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
2017-01-18 16:08:20 -03:00
|
|
|
errmsg("invalid input syntax for type %s", "json"),
|
2012-06-13 19:43:35 -04:00
|
|
|
errdetail("Token \"%s\" is invalid.", token),
|
|
|
|
report_json_context(lex)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Report a CONTEXT line for bogus JSON input.
|
|
|
|
*
|
|
|
|
* lex->token_terminator must be set to identify the spot where we detected
|
|
|
|
* the error. Note that lex->token_start might be NULL, in case we recognized
|
|
|
|
* error at EOF.
|
|
|
|
*
|
|
|
|
* The return value isn't meaningful, but we make it non-void so that this
|
|
|
|
* can be invoked inside ereport().
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
report_json_context(JsonLexContext *lex)
|
|
|
|
{
|
|
|
|
const char *context_start;
|
|
|
|
const char *context_end;
|
|
|
|
const char *line_start;
|
|
|
|
int line_number;
|
|
|
|
char *ctxt;
|
|
|
|
int ctxtlen;
|
|
|
|
const char *prefix;
|
|
|
|
const char *suffix;
|
|
|
|
|
|
|
|
/* Choose boundaries for the part of the input we will display */
|
|
|
|
context_start = lex->input;
|
|
|
|
context_end = lex->token_terminator;
|
|
|
|
line_start = context_start;
|
|
|
|
line_number = 1;
|
|
|
|
for (;;)
|
|
|
|
{
|
2013-06-12 19:51:12 -04:00
|
|
|
/* Always advance over newlines */
|
|
|
|
if (context_start < context_end && *context_start == '\n')
|
2012-06-13 19:43:35 -04:00
|
|
|
{
|
|
|
|
context_start++;
|
|
|
|
line_start = context_start;
|
|
|
|
line_number++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* Otherwise, done as soon as we are close enough to context_end */
|
|
|
|
if (context_end - context_start < 50)
|
|
|
|
break;
|
|
|
|
/* Advance to next multibyte character */
|
|
|
|
if (IS_HIGHBIT_SET(*context_start))
|
|
|
|
context_start += pg_mblen(context_start);
|
|
|
|
else
|
|
|
|
context_start++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We add "..." to indicate that the excerpt doesn't start at the
|
|
|
|
* beginning of the line ... but if we're within 3 characters of the
|
|
|
|
* beginning of the line, we might as well just show the whole line.
|
|
|
|
*/
|
|
|
|
if (context_start - line_start <= 3)
|
|
|
|
context_start = line_start;
|
|
|
|
|
|
|
|
/* Get a null-terminated copy of the data to present */
|
|
|
|
ctxtlen = context_end - context_start;
|
|
|
|
ctxt = palloc(ctxtlen + 1);
|
|
|
|
memcpy(ctxt, context_start, ctxtlen);
|
|
|
|
ctxt[ctxtlen] = '\0';
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Show the context, prefixing "..." if not starting at start of line, and
|
|
|
|
* suffixing "..." if not ending at end of line.
|
|
|
|
*/
|
|
|
|
prefix = (context_start > line_start) ? "..." : "";
|
2013-03-29 14:12:13 -04:00
|
|
|
suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
|
2012-06-13 19:43:35 -04:00
|
|
|
|
|
|
|
return errcontext("JSON data, line %d: %s%s%s",
|
|
|
|
line_number, prefix, ctxt, suffix);
|
2012-01-31 11:48:23 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Extract a single, possibly multi-byte char from the input string.
|
|
|
|
*/
|
|
|
|
static char *
|
|
|
|
extract_mb_char(char *s)
|
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
char *res;
|
|
|
|
int len;
|
2012-01-31 11:48:23 -05:00
|
|
|
|
|
|
|
len = pg_mblen(s);
|
|
|
|
res = palloc(len + 1);
|
|
|
|
memcpy(res, s, len);
|
|
|
|
res[len] = '\0';
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
2012-02-03 12:11:16 -05:00
|
|
|
|
|
|
|
/*
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
* Determine how we want to print values of a given type in datum_to_json.
|
|
|
|
*
|
|
|
|
* Given the datatype OID, return its JsonTypeCategory, as well as the type's
|
|
|
|
* output function OID. If the returned category is JSONTYPE_CAST, we
|
|
|
|
* return the OID of the type->JSON cast function instead.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
json_categorize_type(Oid typoid,
|
|
|
|
JsonTypeCategory *tcategory,
|
|
|
|
Oid *outfuncoid)
|
|
|
|
{
|
|
|
|
bool typisvarlena;
|
|
|
|
|
|
|
|
/* Look through any domain */
|
|
|
|
typoid = getBaseType(typoid);
|
|
|
|
|
2014-12-12 15:31:14 -05:00
|
|
|
*outfuncoid = InvalidOid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to get the output function for everything except date and
|
Fix jsonb Unicode escape processing, and in consequence disallow \u0000.
We've been trying to support \u0000 in JSON values since commit
78ed8e03c67d7333, and have introduced increasingly worse hacks to try to
make it work, such as commit 0ad1a816320a2b53. However, it fundamentally
can't work in the way envisioned, because the stored representation looks
the same as for \\u0000 which is not the same thing at all. It's also
entirely bogus to output \u0000 when de-escaped output is called for.
The right way to do this would be to store an actual 0x00 byte, and then
throw error only if asked to produce de-escaped textual output. However,
getting to that point seems likely to take considerable work and may well
never be practical in the 9.4.x series.
To preserve our options for better behavior while getting rid of the nasty
side-effects of 0ad1a816320a2b53, revert that commit in toto and instead
throw error if \u0000 is used in a context where it needs to be de-escaped.
(These are the same contexts where non-ASCII Unicode escapes throw error
if the database encoding isn't UTF8, so this behavior is by no means
without precedent.)
In passing, make both the \u0000 case and the non-ASCII Unicode case report
ERRCODE_UNTRANSLATABLE_CHARACTER / "unsupported Unicode escape sequence"
rather than claiming there's something wrong with the input syntax.
Back-patch to 9.4, where we have to do something because 0ad1a816320a2b53
broke things for many cases having nothing to do with \u0000. 9.3 also has
bogus behavior, but only for that specific escape value, so given the lack
of field complaints it seems better to leave 9.3 alone.
2015-01-30 14:44:46 -05:00
|
|
|
* timestamp types, array and composite types, booleans, and non-builtin
|
|
|
|
* types where there's a cast to json.
|
2014-12-12 15:31:14 -05:00
|
|
|
*/
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
|
|
|
|
switch (typoid)
|
|
|
|
{
|
|
|
|
case BOOLOID:
|
|
|
|
*tcategory = JSONTYPE_BOOL;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case INT2OID:
|
|
|
|
case INT4OID:
|
|
|
|
case INT8OID:
|
|
|
|
case FLOAT4OID:
|
|
|
|
case FLOAT8OID:
|
|
|
|
case NUMERICOID:
|
2014-12-12 15:31:14 -05:00
|
|
|
getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
*tcategory = JSONTYPE_NUMERIC;
|
|
|
|
break;
|
|
|
|
|
2014-08-17 22:57:15 -04:00
|
|
|
case DATEOID:
|
|
|
|
*tcategory = JSONTYPE_DATE;
|
|
|
|
break;
|
|
|
|
|
2014-06-03 13:56:53 -04:00
|
|
|
case TIMESTAMPOID:
|
|
|
|
*tcategory = JSONTYPE_TIMESTAMP;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TIMESTAMPTZOID:
|
|
|
|
*tcategory = JSONTYPE_TIMESTAMPTZ;
|
|
|
|
break;
|
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
case JSONOID:
|
|
|
|
case JSONBOID:
|
2014-12-12 15:31:14 -05:00
|
|
|
getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
*tcategory = JSONTYPE_JSON;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
/* Check for arrays and composites */
|
2017-02-22 11:10:49 -05:00
|
|
|
if (OidIsValid(get_element_type(typoid)) || typoid == ANYARRAYOID
|
|
|
|
|| typoid == RECORDARRAYOID)
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
*tcategory = JSONTYPE_ARRAY;
|
2017-05-17 16:31:56 -04:00
|
|
|
else if (type_is_rowtype(typoid)) /* includes RECORDOID */
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
*tcategory = JSONTYPE_COMPOSITE;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* It's probably the general case ... */
|
|
|
|
*tcategory = JSONTYPE_OTHER;
|
|
|
|
/* but let's look for a cast to json, if it's not built-in */
|
|
|
|
if (typoid >= FirstNormalObjectId)
|
|
|
|
{
|
Fix jsonb Unicode escape processing, and in consequence disallow \u0000.
We've been trying to support \u0000 in JSON values since commit
78ed8e03c67d7333, and have introduced increasingly worse hacks to try to
make it work, such as commit 0ad1a816320a2b53. However, it fundamentally
can't work in the way envisioned, because the stored representation looks
the same as for \\u0000 which is not the same thing at all. It's also
entirely bogus to output \u0000 when de-escaped output is called for.
The right way to do this would be to store an actual 0x00 byte, and then
throw error only if asked to produce de-escaped textual output. However,
getting to that point seems likely to take considerable work and may well
never be practical in the 9.4.x series.
To preserve our options for better behavior while getting rid of the nasty
side-effects of 0ad1a816320a2b53, revert that commit in toto and instead
throw error if \u0000 is used in a context where it needs to be de-escaped.
(These are the same contexts where non-ASCII Unicode escapes throw error
if the database encoding isn't UTF8, so this behavior is by no means
without precedent.)
In passing, make both the \u0000 case and the non-ASCII Unicode case report
ERRCODE_UNTRANSLATABLE_CHARACTER / "unsupported Unicode escape sequence"
rather than claiming there's something wrong with the input syntax.
Back-patch to 9.4, where we have to do something because 0ad1a816320a2b53
broke things for many cases having nothing to do with \u0000. 9.3 also has
bogus behavior, but only for that specific escape value, so given the lack
of field complaints it seems better to leave 9.3 alone.
2015-01-30 14:44:46 -05:00
|
|
|
Oid castfunc;
|
2014-12-12 15:31:14 -05:00
|
|
|
CoercionPathType ctype;
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
|
2014-12-12 15:31:14 -05:00
|
|
|
ctype = find_coercion_pathway(JSONOID, typoid,
|
Fix jsonb Unicode escape processing, and in consequence disallow \u0000.
We've been trying to support \u0000 in JSON values since commit
78ed8e03c67d7333, and have introduced increasingly worse hacks to try to
make it work, such as commit 0ad1a816320a2b53. However, it fundamentally
can't work in the way envisioned, because the stored representation looks
the same as for \\u0000 which is not the same thing at all. It's also
entirely bogus to output \u0000 when de-escaped output is called for.
The right way to do this would be to store an actual 0x00 byte, and then
throw error only if asked to produce de-escaped textual output. However,
getting to that point seems likely to take considerable work and may well
never be practical in the 9.4.x series.
To preserve our options for better behavior while getting rid of the nasty
side-effects of 0ad1a816320a2b53, revert that commit in toto and instead
throw error if \u0000 is used in a context where it needs to be de-escaped.
(These are the same contexts where non-ASCII Unicode escapes throw error
if the database encoding isn't UTF8, so this behavior is by no means
without precedent.)
In passing, make both the \u0000 case and the non-ASCII Unicode case report
ERRCODE_UNTRANSLATABLE_CHARACTER / "unsupported Unicode escape sequence"
rather than claiming there's something wrong with the input syntax.
Back-patch to 9.4, where we have to do something because 0ad1a816320a2b53
broke things for many cases having nothing to do with \u0000. 9.3 also has
bogus behavior, but only for that specific escape value, so given the lack
of field complaints it seems better to leave 9.3 alone.
2015-01-30 14:44:46 -05:00
|
|
|
COERCION_EXPLICIT,
|
|
|
|
&castfunc);
|
2014-12-12 15:31:14 -05:00
|
|
|
if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc))
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
{
|
2014-12-12 15:31:14 -05:00
|
|
|
*tcategory = JSONTYPE_CAST;
|
|
|
|
*outfuncoid = castfunc;
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
}
|
2014-12-12 15:31:14 -05:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/* non builtin type with no cast */
|
|
|
|
getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* any other builtin type */
|
|
|
|
getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Turn a Datum into JSON text, appending the string to "result".
|
2012-06-12 16:23:45 -04:00
|
|
|
*
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
* tcategory and outfuncoid are from a previous call to json_categorize_type,
|
|
|
|
* except that if is_null is true then they can be invalid.
|
|
|
|
*
|
|
|
|
* If key_scalar is true, the value is being printed as a key, so insist
|
|
|
|
* it's of an acceptable type, and force it to be quoted.
|
2012-02-03 12:11:16 -05:00
|
|
|
*/
|
2012-06-12 16:23:45 -04:00
|
|
|
static void
|
|
|
|
datum_to_json(Datum val, bool is_null, StringInfo result,
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
JsonTypeCategory tcategory, Oid outfuncoid,
|
|
|
|
bool key_scalar)
|
2012-02-03 12:11:16 -05:00
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
char *outputstr;
|
2013-03-10 17:35:36 -04:00
|
|
|
text *jsontext;
|
2012-02-03 12:11:16 -05:00
|
|
|
|
2015-10-05 10:06:29 -04:00
|
|
|
check_stack_depth();
|
|
|
|
|
2014-09-25 15:08:42 -04:00
|
|
|
/* callers are expected to ensure that null keys are not passed in */
|
2014-12-02 15:02:37 -05:00
|
|
|
Assert(!(key_scalar && is_null));
|
2014-09-25 15:08:42 -04:00
|
|
|
|
2012-02-23 23:44:16 -05:00
|
|
|
if (is_null)
|
2012-02-03 12:11:16 -05:00
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
appendStringInfoString(result, "null");
|
2012-02-03 12:11:16 -05:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
if (key_scalar &&
|
|
|
|
(tcategory == JSONTYPE_ARRAY ||
|
|
|
|
tcategory == JSONTYPE_COMPOSITE ||
|
|
|
|
tcategory == JSONTYPE_JSON ||
|
|
|
|
tcategory == JSONTYPE_CAST))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
errmsg("key value must be scalar, not array, composite, or json")));
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
|
2012-02-03 12:11:16 -05:00
|
|
|
switch (tcategory)
|
|
|
|
{
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
case JSONTYPE_ARRAY:
|
2012-02-03 12:11:16 -05:00
|
|
|
array_to_json_internal(val, result, false);
|
|
|
|
break;
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
case JSONTYPE_COMPOSITE:
|
2014-09-29 13:32:22 -04:00
|
|
|
composite_to_json(val, result, false);
|
2012-02-03 12:11:16 -05:00
|
|
|
break;
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
case JSONTYPE_BOOL:
|
|
|
|
outputstr = DatumGetBool(val) ? "true" : "false";
|
|
|
|
if (key_scalar)
|
|
|
|
escape_json(result, outputstr);
|
2012-02-03 12:11:16 -05:00
|
|
|
else
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
appendStringInfoString(result, outputstr);
|
2012-02-03 12:11:16 -05:00
|
|
|
break;
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
case JSONTYPE_NUMERIC:
|
|
|
|
outputstr = OidOutputFunctionCall(outfuncoid, val);
|
2014-12-02 15:02:37 -05:00
|
|
|
|
2014-12-01 11:28:45 -05:00
|
|
|
/*
|
|
|
|
* Don't call escape_json for a non-key if it's a valid JSON
|
|
|
|
* number.
|
|
|
|
*/
|
|
|
|
if (!key_scalar && IsValidJsonNumber(outputstr, strlen(outputstr)))
|
|
|
|
appendStringInfoString(result, outputstr);
|
2014-01-28 17:48:21 -05:00
|
|
|
else
|
2014-12-01 11:28:45 -05:00
|
|
|
escape_json(result, outputstr);
|
2012-02-20 15:01:03 -05:00
|
|
|
pfree(outputstr);
|
|
|
|
break;
|
2014-08-17 22:57:15 -04:00
|
|
|
case JSONTYPE_DATE:
|
|
|
|
{
|
|
|
|
DateADT date;
|
|
|
|
struct pg_tm tm;
|
|
|
|
char buf[MAXDATELEN + 1];
|
|
|
|
|
|
|
|
date = DatumGetDateADT(val);
|
2015-10-20 11:06:24 -07:00
|
|
|
/* Same as date_out(), but forcing DateStyle */
|
2014-08-17 22:57:15 -04:00
|
|
|
if (DATE_NOT_FINITE(date))
|
2015-10-20 11:06:24 -07:00
|
|
|
EncodeSpecialDate(date, buf);
|
2014-08-17 22:57:15 -04:00
|
|
|
else
|
|
|
|
{
|
|
|
|
j2date(date + POSTGRES_EPOCH_JDATE,
|
|
|
|
&(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
|
|
|
|
EncodeDateOnly(&tm, USE_XSD_DATES, buf);
|
|
|
|
}
|
2015-10-20 11:06:24 -07:00
|
|
|
appendStringInfo(result, "\"%s\"", buf);
|
2014-08-17 22:57:15 -04:00
|
|
|
}
|
|
|
|
break;
|
2014-06-03 13:56:53 -04:00
|
|
|
case JSONTYPE_TIMESTAMP:
|
2014-06-03 18:26:47 -04:00
|
|
|
{
|
|
|
|
Timestamp timestamp;
|
|
|
|
struct pg_tm tm;
|
|
|
|
fsec_t fsec;
|
|
|
|
char buf[MAXDATELEN + 1];
|
|
|
|
|
|
|
|
timestamp = DatumGetTimestamp(val);
|
2015-10-20 11:06:24 -07:00
|
|
|
/* Same as timestamp_out(), but forcing DateStyle */
|
2014-06-03 18:26:47 -04:00
|
|
|
if (TIMESTAMP_NOT_FINITE(timestamp))
|
2015-10-20 11:06:24 -07:00
|
|
|
EncodeSpecialTimestamp(timestamp, buf);
|
2014-06-03 18:26:47 -04:00
|
|
|
else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
|
|
|
|
EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
|
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
|
|
|
|
errmsg("timestamp out of range")));
|
2015-10-20 11:06:24 -07:00
|
|
|
appendStringInfo(result, "\"%s\"", buf);
|
2014-06-03 18:26:47 -04:00
|
|
|
}
|
2014-06-03 13:56:53 -04:00
|
|
|
break;
|
|
|
|
case JSONTYPE_TIMESTAMPTZ:
|
2014-06-03 18:26:47 -04:00
|
|
|
{
|
|
|
|
TimestampTz timestamp;
|
|
|
|
struct pg_tm tm;
|
|
|
|
int tz;
|
|
|
|
fsec_t fsec;
|
|
|
|
const char *tzn = NULL;
|
|
|
|
char buf[MAXDATELEN + 1];
|
|
|
|
|
2015-10-20 11:06:24 -07:00
|
|
|
timestamp = DatumGetTimestampTz(val);
|
|
|
|
/* Same as timestamptz_out(), but forcing DateStyle */
|
2014-06-03 18:26:47 -04:00
|
|
|
if (TIMESTAMP_NOT_FINITE(timestamp))
|
2015-10-20 11:06:24 -07:00
|
|
|
EncodeSpecialTimestamp(timestamp, buf);
|
2014-06-03 18:26:47 -04:00
|
|
|
else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
|
|
|
|
EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
|
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
|
|
|
|
errmsg("timestamp out of range")));
|
2015-10-20 11:06:24 -07:00
|
|
|
appendStringInfo(result, "\"%s\"", buf);
|
2014-06-03 18:26:47 -04:00
|
|
|
}
|
2014-06-03 13:56:53 -04:00
|
|
|
break;
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
case JSONTYPE_JSON:
|
|
|
|
/* JSON and JSONB output will already be escaped */
|
|
|
|
outputstr = OidOutputFunctionCall(outfuncoid, val);
|
2012-02-03 12:11:16 -05:00
|
|
|
appendStringInfoString(result, outputstr);
|
|
|
|
pfree(outputstr);
|
|
|
|
break;
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
case JSONTYPE_CAST:
|
|
|
|
/* outfuncoid refers to a cast function, not an output function */
|
2017-03-12 19:35:34 -04:00
|
|
|
jsontext = DatumGetTextPP(OidFunctionCall1(outfuncoid, val));
|
2013-03-10 17:35:36 -04:00
|
|
|
outputstr = text_to_cstring(jsontext);
|
|
|
|
appendStringInfoString(result, outputstr);
|
|
|
|
pfree(outputstr);
|
|
|
|
pfree(jsontext);
|
|
|
|
break;
|
2012-02-03 12:11:16 -05:00
|
|
|
default:
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
outputstr = OidOutputFunctionCall(outfuncoid, val);
|
2012-02-03 12:11:16 -05:00
|
|
|
escape_json(result, outputstr);
|
|
|
|
pfree(outputstr);
|
2012-06-12 16:23:45 -04:00
|
|
|
break;
|
2012-02-03 12:11:16 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Process a single dimension of an array.
|
|
|
|
* If it's the innermost dimension, output the values, otherwise call
|
|
|
|
* ourselves recursively to process the next dimension.
|
|
|
|
*/
|
|
|
|
static void
|
2012-06-10 15:20:04 -04:00
|
|
|
array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
bool *nulls, int *valcount, JsonTypeCategory tcategory,
|
|
|
|
Oid outfuncoid, bool use_line_feeds)
|
2012-02-03 12:11:16 -05:00
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
int i;
|
2012-06-12 16:23:45 -04:00
|
|
|
const char *sep;
|
2012-02-03 12:11:16 -05:00
|
|
|
|
|
|
|
Assert(dim < ndims);
|
|
|
|
|
|
|
|
sep = use_line_feeds ? ",\n " : ",";
|
|
|
|
|
|
|
|
appendStringInfoChar(result, '[');
|
|
|
|
|
|
|
|
for (i = 1; i <= dims[dim]; i++)
|
|
|
|
{
|
|
|
|
if (i > 1)
|
2012-06-10 15:20:04 -04:00
|
|
|
appendStringInfoString(result, sep);
|
2012-02-03 12:11:16 -05:00
|
|
|
|
|
|
|
if (dim + 1 == ndims)
|
|
|
|
{
|
2012-02-23 23:44:16 -05:00
|
|
|
datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
outfuncoid, false);
|
2012-02-03 12:11:16 -05:00
|
|
|
(*valcount)++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2012-06-10 15:20:04 -04:00
|
|
|
* Do we want line feeds on inner dimensions of arrays? For now
|
|
|
|
* we'll say no.
|
2012-02-03 12:11:16 -05:00
|
|
|
*/
|
2012-06-10 15:20:04 -04:00
|
|
|
array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
valcount, tcategory, outfuncoid, false);
|
2012-02-03 12:11:16 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
appendStringInfoChar(result, ']');
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Turn an array into JSON.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
|
|
|
|
{
|
|
|
|
ArrayType *v = DatumGetArrayTypeP(array);
|
|
|
|
Oid element_type = ARR_ELEMTYPE(v);
|
|
|
|
int *dim;
|
|
|
|
int ndim;
|
|
|
|
int nitems;
|
2012-06-10 15:20:04 -04:00
|
|
|
int count = 0;
|
2012-02-03 12:11:16 -05:00
|
|
|
Datum *elements;
|
2012-06-10 15:20:04 -04:00
|
|
|
bool *nulls;
|
2012-02-03 12:11:16 -05:00
|
|
|
int16 typlen;
|
|
|
|
bool typbyval;
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
char typalign;
|
|
|
|
JsonTypeCategory tcategory;
|
|
|
|
Oid outfuncoid;
|
2012-02-03 12:11:16 -05:00
|
|
|
|
|
|
|
ndim = ARR_NDIM(v);
|
|
|
|
dim = ARR_DIMS(v);
|
|
|
|
nitems = ArrayGetNItems(ndim, dim);
|
|
|
|
|
|
|
|
if (nitems <= 0)
|
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
appendStringInfoString(result, "[]");
|
2012-02-03 12:11:16 -05:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
get_typlenbyvalalign(element_type,
|
|
|
|
&typlen, &typbyval, &typalign);
|
2013-03-10 17:35:36 -04:00
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
json_categorize_type(element_type,
|
|
|
|
&tcategory, &outfuncoid);
|
2013-03-10 17:35:36 -04:00
|
|
|
|
2012-02-03 12:11:16 -05:00
|
|
|
deconstruct_array(v, element_type, typlen, typbyval,
|
|
|
|
typalign, &elements, &nulls,
|
|
|
|
&nitems);
|
|
|
|
|
2012-02-23 23:44:16 -05:00
|
|
|
array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
outfuncoid, use_line_feeds);
|
2012-02-03 12:11:16 -05:00
|
|
|
|
|
|
|
pfree(elements);
|
|
|
|
pfree(nulls);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Turn a composite / record into JSON.
|
|
|
|
*/
|
|
|
|
static void
|
2014-09-29 13:32:22 -04:00
|
|
|
composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
|
2012-02-03 12:11:16 -05:00
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
HeapTupleHeader td;
|
|
|
|
Oid tupType;
|
|
|
|
int32 tupTypmod;
|
|
|
|
TupleDesc tupdesc;
|
|
|
|
HeapTupleData tmptup,
|
|
|
|
*tuple;
|
|
|
|
int i;
|
|
|
|
bool needsep = false;
|
2012-06-12 16:23:45 -04:00
|
|
|
const char *sep;
|
2012-02-03 12:11:16 -05:00
|
|
|
|
|
|
|
sep = use_line_feeds ? ",\n " : ",";
|
|
|
|
|
2012-06-10 15:20:04 -04:00
|
|
|
td = DatumGetHeapTupleHeader(composite);
|
2012-02-03 12:11:16 -05:00
|
|
|
|
2012-06-10 15:20:04 -04:00
|
|
|
/* Extract rowtype info and find a tupdesc */
|
|
|
|
tupType = HeapTupleHeaderGetTypeId(td);
|
|
|
|
tupTypmod = HeapTupleHeaderGetTypMod(td);
|
|
|
|
tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
|
2012-02-03 12:11:16 -05:00
|
|
|
|
2012-06-10 15:20:04 -04:00
|
|
|
/* Build a temporary HeapTuple control structure */
|
|
|
|
tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
|
|
|
|
tmptup.t_data = td;
|
2012-02-03 12:11:16 -05:00
|
|
|
tuple = &tmptup;
|
|
|
|
|
2012-06-10 15:20:04 -04:00
|
|
|
appendStringInfoChar(result, '{');
|
2012-02-03 12:11:16 -05:00
|
|
|
|
2012-06-10 15:20:04 -04:00
|
|
|
for (i = 0; i < tupdesc->natts; i++)
|
|
|
|
{
|
2013-11-03 11:55:37 -05:00
|
|
|
Datum val;
|
2012-06-10 15:20:04 -04:00
|
|
|
bool isnull;
|
|
|
|
char *attname;
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
JsonTypeCategory tcategory;
|
|
|
|
Oid outfuncoid;
|
2017-08-20 11:19:07 -07:00
|
|
|
Form_pg_attribute att = TupleDescAttr(tupdesc, i);
|
2012-02-03 12:11:16 -05:00
|
|
|
|
2017-08-20 11:19:07 -07:00
|
|
|
if (att->attisdropped)
|
2012-06-10 15:20:04 -04:00
|
|
|
continue;
|
2012-02-03 12:11:16 -05:00
|
|
|
|
|
|
|
if (needsep)
|
2012-06-10 15:20:04 -04:00
|
|
|
appendStringInfoString(result, sep);
|
2012-02-03 12:11:16 -05:00
|
|
|
needsep = true;
|
|
|
|
|
2017-08-20 11:19:07 -07:00
|
|
|
attname = NameStr(att->attname);
|
2012-06-10 15:20:04 -04:00
|
|
|
escape_json(result, attname);
|
|
|
|
appendStringInfoChar(result, ':');
|
2012-02-03 12:11:16 -05:00
|
|
|
|
2014-09-29 13:32:22 -04:00
|
|
|
val = heap_getattr(tuple, i + 1, tupdesc, &isnull);
|
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
if (isnull)
|
2013-03-10 17:35:36 -04:00
|
|
|
{
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
tcategory = JSONTYPE_NULL;
|
|
|
|
outfuncoid = InvalidOid;
|
2013-03-10 17:35:36 -04:00
|
|
|
}
|
2012-02-03 12:11:16 -05:00
|
|
|
else
|
2017-08-20 11:19:07 -07:00
|
|
|
json_categorize_type(att->atttypid, &tcategory, &outfuncoid);
|
2012-02-03 12:11:16 -05:00
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
datum_to_json(val, isnull, result, tcategory, outfuncoid, false);
|
2012-02-03 12:11:16 -05:00
|
|
|
}
|
|
|
|
|
2012-06-10 15:20:04 -04:00
|
|
|
appendStringInfoChar(result, '}');
|
|
|
|
ReleaseTupleDesc(tupdesc);
|
2012-02-03 12:11:16 -05:00
|
|
|
}
|
|
|
|
|
2014-01-28 17:48:21 -05:00
|
|
|
/*
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
* Append JSON text for "val" to "result".
|
|
|
|
*
|
|
|
|
* This is just a thin wrapper around datum_to_json. If the same type will be
|
|
|
|
* printed many times, avoid using this; better to do the json_categorize_type
|
|
|
|
* lookups only once.
|
2014-01-28 17:48:21 -05:00
|
|
|
*/
|
|
|
|
static void
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
add_json(Datum val, bool is_null, StringInfo result,
|
|
|
|
Oid val_type, bool key_scalar)
|
2014-01-28 17:48:21 -05:00
|
|
|
{
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
JsonTypeCategory tcategory;
|
|
|
|
Oid outfuncoid;
|
2014-01-28 17:48:21 -05:00
|
|
|
|
|
|
|
if (val_type == InvalidOid)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("could not determine input data type")));
|
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
if (is_null)
|
2014-01-28 17:48:21 -05:00
|
|
|
{
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
tcategory = JSONTYPE_NULL;
|
|
|
|
outfuncoid = InvalidOid;
|
2014-01-28 17:48:21 -05:00
|
|
|
}
|
|
|
|
else
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
json_categorize_type(val_type,
|
|
|
|
&tcategory, &outfuncoid);
|
2014-01-28 17:48:21 -05:00
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
datum_to_json(val, is_null, result, tcategory, outfuncoid, key_scalar);
|
2014-01-28 17:48:21 -05:00
|
|
|
}
|
|
|
|
|
2012-02-03 12:11:16 -05:00
|
|
|
/*
|
2014-09-29 13:32:22 -04:00
|
|
|
* SQL function array_to_json(row)
|
2012-02-03 12:11:16 -05:00
|
|
|
*/
|
|
|
|
extern Datum
|
2014-09-11 21:23:51 -04:00
|
|
|
array_to_json(PG_FUNCTION_ARGS)
|
2014-09-29 13:32:22 -04:00
|
|
|
{
|
|
|
|
Datum array = PG_GETARG_DATUM(0);
|
|
|
|
StringInfo result;
|
|
|
|
|
|
|
|
result = makeStringInfo();
|
|
|
|
|
|
|
|
array_to_json_internal(array, result, false);
|
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SQL function array_to_json(row, prettybool)
|
|
|
|
*/
|
|
|
|
extern Datum
|
|
|
|
array_to_json_pretty(PG_FUNCTION_ARGS)
|
2012-02-03 12:11:16 -05:00
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
Datum array = PG_GETARG_DATUM(0);
|
|
|
|
bool use_line_feeds = PG_GETARG_BOOL(1);
|
2012-02-03 12:11:16 -05:00
|
|
|
StringInfo result;
|
|
|
|
|
|
|
|
result = makeStringInfo();
|
|
|
|
|
|
|
|
array_to_json_internal(array, result, use_line_feeds);
|
|
|
|
|
2013-11-18 10:17:07 -05:00
|
|
|
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
|
2012-06-30 17:29:39 -04:00
|
|
|
}
|
2012-02-03 12:11:16 -05:00
|
|
|
|
|
|
|
/*
|
2014-09-29 13:32:22 -04:00
|
|
|
* SQL function row_to_json(row)
|
2012-02-03 12:11:16 -05:00
|
|
|
*/
|
|
|
|
extern Datum
|
|
|
|
row_to_json(PG_FUNCTION_ARGS)
|
2014-09-29 13:32:22 -04:00
|
|
|
{
|
|
|
|
Datum array = PG_GETARG_DATUM(0);
|
|
|
|
StringInfo result;
|
|
|
|
|
|
|
|
result = makeStringInfo();
|
|
|
|
|
|
|
|
composite_to_json(array, result, false);
|
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SQL function row_to_json(row, prettybool)
|
|
|
|
*/
|
|
|
|
extern Datum
|
|
|
|
row_to_json_pretty(PG_FUNCTION_ARGS)
|
2012-02-03 12:11:16 -05:00
|
|
|
{
|
2012-06-10 15:20:04 -04:00
|
|
|
Datum array = PG_GETARG_DATUM(0);
|
|
|
|
bool use_line_feeds = PG_GETARG_BOOL(1);
|
2012-02-03 12:11:16 -05:00
|
|
|
StringInfo result;
|
|
|
|
|
|
|
|
result = makeStringInfo();
|
|
|
|
|
2014-09-29 13:32:22 -04:00
|
|
|
composite_to_json(array, result, use_line_feeds);
|
2012-02-03 12:11:16 -05:00
|
|
|
|
2013-11-18 10:17:07 -05:00
|
|
|
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
|
2012-06-30 17:29:39 -04:00
|
|
|
}
|
2012-02-03 12:11:16 -05:00
|
|
|
|
2013-03-10 17:35:36 -04:00
|
|
|
/*
|
|
|
|
* SQL function to_json(anyvalue)
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
to_json(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2013-11-03 11:55:37 -05:00
|
|
|
Datum val = PG_GETARG_DATUM(0);
|
2013-03-10 17:35:36 -04:00
|
|
|
Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
|
|
|
|
StringInfo result;
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
JsonTypeCategory tcategory;
|
|
|
|
Oid outfuncoid;
|
2013-03-10 17:35:36 -04:00
|
|
|
|
|
|
|
if (val_type == InvalidOid)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("could not determine input data type")));
|
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
json_categorize_type(val_type,
|
|
|
|
&tcategory, &outfuncoid);
|
2013-03-10 17:35:36 -04:00
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
result = makeStringInfo();
|
2013-03-10 17:35:36 -04:00
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
datum_to_json(val, false, result, tcategory, outfuncoid, false);
|
2013-03-10 17:35:36 -04:00
|
|
|
|
2013-11-18 10:17:07 -05:00
|
|
|
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
|
2013-03-10 17:35:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* json_agg transition function
|
2014-12-02 15:02:37 -05:00
|
|
|
*
|
|
|
|
* aggregate input column as a json array value.
|
2013-03-10 17:35:36 -04:00
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_agg_transfn(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
MemoryContext aggcontext,
|
|
|
|
oldcontext;
|
2015-10-20 11:06:24 -07:00
|
|
|
JsonAggState *state;
|
2013-11-03 11:55:37 -05:00
|
|
|
Datum val;
|
2013-03-10 17:35:36 -04:00
|
|
|
|
|
|
|
if (!AggCheckCallContext(fcinfo, &aggcontext))
|
|
|
|
{
|
|
|
|
/* cannot be called directly because of internal-type argument */
|
|
|
|
elog(ERROR, "json_agg_transfn called in non-aggregate context");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (PG_ARGISNULL(0))
|
|
|
|
{
|
2015-10-20 11:06:24 -07:00
|
|
|
Oid arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
|
2015-09-18 14:39:39 -04:00
|
|
|
|
|
|
|
if (arg_type == InvalidOid)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("could not determine input data type")));
|
|
|
|
|
2013-03-10 17:35:36 -04:00
|
|
|
/*
|
2015-09-18 14:39:39 -04:00
|
|
|
* Make this state object in a context where it will persist for the
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
* duration of the aggregate call. MemoryContextSwitchTo is only
|
|
|
|
* needed the first time, as the StringInfo routines make sure they
|
|
|
|
* use the right context to enlarge the object if necessary.
|
2013-03-10 17:35:36 -04:00
|
|
|
*/
|
|
|
|
oldcontext = MemoryContextSwitchTo(aggcontext);
|
2015-09-18 14:39:39 -04:00
|
|
|
state = (JsonAggState *) palloc(sizeof(JsonAggState));
|
|
|
|
state->str = makeStringInfo();
|
2013-03-10 17:35:36 -04:00
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
|
2015-09-18 14:39:39 -04:00
|
|
|
appendStringInfoChar(state->str, '[');
|
2015-10-20 11:06:24 -07:00
|
|
|
json_categorize_type(arg_type, &state->val_category,
|
2015-09-18 14:39:39 -04:00
|
|
|
&state->val_output_func);
|
2013-03-10 17:35:36 -04:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2015-09-18 14:39:39 -04:00
|
|
|
state = (JsonAggState *) PG_GETARG_POINTER(0);
|
|
|
|
appendStringInfoString(state->str, ", ");
|
2013-03-10 17:35:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* fast path for NULLs */
|
|
|
|
if (PG_ARGISNULL(1))
|
|
|
|
{
|
2015-09-18 14:39:39 -04:00
|
|
|
datum_to_json((Datum) 0, true, state->str, JSONTYPE_NULL,
|
|
|
|
InvalidOid, false);
|
2013-03-10 17:35:36 -04:00
|
|
|
PG_RETURN_POINTER(state);
|
|
|
|
}
|
|
|
|
|
2013-11-03 11:55:37 -05:00
|
|
|
val = PG_GETARG_DATUM(1);
|
2013-03-10 17:35:36 -04:00
|
|
|
|
Get rid of bogus dependency on typcategory in to_json() and friends.
These functions were relying on typcategory to identify arrays and
composites, which is not reliable and not the normal way to do it.
Using typcategory to identify boolean, numeric types, and json itself is
also pretty questionable, though the code in those cases didn't seem to be
at risk of anything worse than wrong output. Instead, use the standard
lsyscache functions to identify arrays and composites, and rely on a direct
check of the type OID for the other cases.
In HEAD, also be sure to look through domains so that a domain is treated
the same as its base type for conversions to JSON. However, this is a
small behavioral change; given the lack of field complaints, we won't
back-patch it.
In passing, refactor so that there's only one copy of the code that decides
which conversion strategy to apply, not multiple copies that could (and
have) gotten out of sync.
2014-05-09 12:55:00 -04:00
|
|
|
/* add some whitespace if structured type and not first item */
|
2013-03-10 17:35:36 -04:00
|
|
|
if (!PG_ARGISNULL(0) &&
|
2015-09-18 14:39:39 -04:00
|
|
|
(state->val_category == JSONTYPE_ARRAY ||
|
|
|
|
state->val_category == JSONTYPE_COMPOSITE))
|
2013-03-10 17:35:36 -04:00
|
|
|
{
|
2015-09-18 14:39:39 -04:00
|
|
|
appendStringInfoString(state->str, "\n ");
|
2013-03-10 17:35:36 -04:00
|
|
|
}
|
|
|
|
|
2015-09-18 14:39:39 -04:00
|
|
|
datum_to_json(val, false, state->str, state->val_category,
|
|
|
|
state->val_output_func, false);
|
2013-03-10 17:35:36 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The transition type for array_agg() is declared to be "internal", which
|
2014-05-06 12:12:18 -04:00
|
|
|
* is a pass-by-value type the same size as a pointer. So we can safely
|
2015-09-18 14:39:39 -04:00
|
|
|
* pass the JsonAggState pointer through nodeAgg.c's machinations.
|
2013-03-10 17:35:36 -04:00
|
|
|
*/
|
|
|
|
PG_RETURN_POINTER(state);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* json_agg final function
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_agg_finalfn(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2015-10-20 11:06:24 -07:00
|
|
|
JsonAggState *state;
|
2013-03-10 17:35:36 -04:00
|
|
|
|
|
|
|
/* cannot be called directly because of internal-type argument */
|
|
|
|
Assert(AggCheckCallContext(fcinfo, NULL));
|
|
|
|
|
2015-09-18 14:39:39 -04:00
|
|
|
state = PG_ARGISNULL(0) ?
|
|
|
|
NULL :
|
|
|
|
(JsonAggState *) PG_GETARG_POINTER(0);
|
2013-03-10 17:35:36 -04:00
|
|
|
|
2014-12-02 15:02:37 -05:00
|
|
|
/* NULL result for no rows in, as is standard with aggregates */
|
2013-03-10 17:35:36 -04:00
|
|
|
if (state == NULL)
|
|
|
|
PG_RETURN_NULL();
|
|
|
|
|
2014-12-02 15:02:37 -05:00
|
|
|
/* Else return state with appropriate array terminator added */
|
2015-09-18 14:39:39 -04:00
|
|
|
PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, "]"));
|
2013-03-10 17:35:36 -04:00
|
|
|
}
|
|
|
|
|
2014-01-28 17:48:21 -05:00
|
|
|
/*
|
|
|
|
* json_object_agg transition function.
|
|
|
|
*
|
2014-12-02 15:02:37 -05:00
|
|
|
* aggregate two input columns as a single json object value.
|
2014-01-28 17:48:21 -05:00
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_object_agg_transfn(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
MemoryContext aggcontext,
|
|
|
|
oldcontext;
|
2015-10-20 11:06:24 -07:00
|
|
|
JsonAggState *state;
|
2014-01-28 17:48:21 -05:00
|
|
|
Datum arg;
|
|
|
|
|
|
|
|
if (!AggCheckCallContext(fcinfo, &aggcontext))
|
|
|
|
{
|
|
|
|
/* cannot be called directly because of internal-type argument */
|
2014-12-02 15:02:37 -05:00
|
|
|
elog(ERROR, "json_object_agg_transfn called in non-aggregate context");
|
2014-01-28 17:48:21 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if (PG_ARGISNULL(0))
|
|
|
|
{
|
2015-09-18 14:39:39 -04:00
|
|
|
Oid arg_type;
|
|
|
|
|
2014-01-28 17:48:21 -05:00
|
|
|
/*
|
2014-08-09 17:31:13 -04:00
|
|
|
* Make the StringInfo in a context where it will persist for the
|
|
|
|
* duration of the aggregate call. Switching context is only needed
|
|
|
|
* for this initial step, as the StringInfo routines make sure they
|
|
|
|
* use the right context to enlarge the object if necessary.
|
2014-01-28 17:48:21 -05:00
|
|
|
*/
|
|
|
|
oldcontext = MemoryContextSwitchTo(aggcontext);
|
2015-09-18 14:39:39 -04:00
|
|
|
state = (JsonAggState *) palloc(sizeof(JsonAggState));
|
|
|
|
state->str = makeStringInfo();
|
2014-01-28 17:48:21 -05:00
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
|
2015-09-18 14:39:39 -04:00
|
|
|
arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
|
|
|
|
|
|
|
|
if (arg_type == InvalidOid)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
errmsg("could not determine data type for argument %d", 1)));
|
2015-09-18 14:39:39 -04:00
|
|
|
|
2015-10-20 11:06:24 -07:00
|
|
|
json_categorize_type(arg_type, &state->key_category,
|
2015-09-18 14:39:39 -04:00
|
|
|
&state->key_output_func);
|
|
|
|
|
|
|
|
arg_type = get_fn_expr_argtype(fcinfo->flinfo, 2);
|
|
|
|
|
|
|
|
if (arg_type == InvalidOid)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
errmsg("could not determine data type for argument %d", 2)));
|
2015-09-18 14:39:39 -04:00
|
|
|
|
2015-10-20 11:06:24 -07:00
|
|
|
json_categorize_type(arg_type, &state->val_category,
|
2015-09-18 14:39:39 -04:00
|
|
|
&state->val_output_func);
|
|
|
|
|
|
|
|
appendStringInfoString(state->str, "{ ");
|
2014-01-28 17:48:21 -05:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2015-09-18 14:39:39 -04:00
|
|
|
state = (JsonAggState *) PG_GETARG_POINTER(0);
|
|
|
|
appendStringInfoString(state->str, ", ");
|
2014-01-28 17:48:21 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2014-08-09 17:31:13 -04:00
|
|
|
* Note: since json_object_agg() is declared as taking type "any", the
|
|
|
|
* parser will not do any type conversion on unknown-type literals (that
|
|
|
|
* is, undecorated strings or NULLs). Such values will arrive here as
|
|
|
|
* type UNKNOWN, which fortunately does not matter to us, since
|
|
|
|
* unknownout() works fine.
|
2014-01-28 17:48:21 -05:00
|
|
|
*/
|
|
|
|
|
2014-08-09 17:31:13 -04:00
|
|
|
if (PG_ARGISNULL(1))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("field name must not be null")));
|
|
|
|
|
|
|
|
arg = PG_GETARG_DATUM(1);
|
|
|
|
|
2015-09-18 14:39:39 -04:00
|
|
|
datum_to_json(arg, false, state->str, state->key_category,
|
|
|
|
state->key_output_func, true);
|
2014-01-28 17:48:21 -05:00
|
|
|
|
2015-09-18 14:39:39 -04:00
|
|
|
appendStringInfoString(state->str, " : ");
|
2014-01-28 17:48:21 -05:00
|
|
|
|
2014-08-09 17:31:13 -04:00
|
|
|
if (PG_ARGISNULL(2))
|
|
|
|
arg = (Datum) 0;
|
|
|
|
else
|
|
|
|
arg = PG_GETARG_DATUM(2);
|
|
|
|
|
2015-09-18 14:39:39 -04:00
|
|
|
datum_to_json(arg, PG_ARGISNULL(2), state->str, state->val_category,
|
|
|
|
state->val_output_func, false);
|
2014-01-28 17:48:21 -05:00
|
|
|
|
|
|
|
PG_RETURN_POINTER(state);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* json_object_agg final function.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_object_agg_finalfn(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2015-10-20 11:06:24 -07:00
|
|
|
JsonAggState *state;
|
2014-01-28 17:48:21 -05:00
|
|
|
|
|
|
|
/* cannot be called directly because of internal-type argument */
|
|
|
|
Assert(AggCheckCallContext(fcinfo, NULL));
|
|
|
|
|
2015-09-18 14:39:39 -04:00
|
|
|
state = PG_ARGISNULL(0) ? NULL : (JsonAggState *) PG_GETARG_POINTER(0);
|
2014-01-28 17:48:21 -05:00
|
|
|
|
2014-12-02 15:02:37 -05:00
|
|
|
/* NULL result for no rows in, as is standard with aggregates */
|
2014-01-28 17:48:21 -05:00
|
|
|
if (state == NULL)
|
2014-09-25 08:18:18 -04:00
|
|
|
PG_RETURN_NULL();
|
2014-01-28 17:48:21 -05:00
|
|
|
|
2014-12-02 15:02:37 -05:00
|
|
|
/* Else return state with appropriate object terminator added */
|
2015-09-18 14:39:39 -04:00
|
|
|
PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, " }"));
|
2014-12-02 15:02:37 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Helper function for aggregates: return given StringInfo's contents plus
|
|
|
|
* specified trailing string, as a text datum. We need this because aggregate
|
|
|
|
* final functions are not allowed to modify the aggregate state.
|
|
|
|
*/
|
|
|
|
static text *
|
|
|
|
catenate_stringinfo_string(StringInfo buffer, const char *addon)
|
|
|
|
{
|
|
|
|
/* custom version of cstring_to_text_with_len */
|
|
|
|
int buflen = buffer->len;
|
|
|
|
int addlen = strlen(addon);
|
|
|
|
text *result = (text *) palloc(buflen + addlen + VARHDRSZ);
|
|
|
|
|
|
|
|
SET_VARSIZE(result, buflen + addlen + VARHDRSZ);
|
|
|
|
memcpy(VARDATA(result), buffer->data, buflen);
|
|
|
|
memcpy(VARDATA(result) + buflen, addon, addlen);
|
2014-01-28 17:48:21 -05:00
|
|
|
|
2014-12-02 15:02:37 -05:00
|
|
|
return result;
|
2014-01-28 17:48:21 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SQL function json_build_object(variadic "any")
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_build_object(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
int nargs = PG_NARGS();
|
|
|
|
int i;
|
|
|
|
Datum arg;
|
2014-08-09 17:31:13 -04:00
|
|
|
const char *sep = "";
|
2014-01-28 17:48:21 -05:00
|
|
|
StringInfo result;
|
|
|
|
Oid val_type;
|
|
|
|
|
|
|
|
if (nargs % 2 != 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2014-08-09 16:35:29 -04:00
|
|
|
errmsg("argument list must have even number of elements"),
|
|
|
|
errhint("The arguments of json_build_object() must consist of alternating keys and values.")));
|
2014-01-28 17:48:21 -05:00
|
|
|
|
|
|
|
result = makeStringInfo();
|
|
|
|
|
|
|
|
appendStringInfoChar(result, '{');
|
|
|
|
|
|
|
|
for (i = 0; i < nargs; i += 2)
|
|
|
|
{
|
2014-08-09 17:31:13 -04:00
|
|
|
/*
|
|
|
|
* Note: since json_build_object() is declared as taking type "any",
|
|
|
|
* the parser will not do any type conversion on unknown-type literals
|
|
|
|
* (that is, undecorated strings or NULLs). Such values will arrive
|
|
|
|
* here as type UNKNOWN, which fortunately does not matter to us,
|
|
|
|
* since unknownout() works fine.
|
|
|
|
*/
|
|
|
|
appendStringInfoString(result, sep);
|
|
|
|
sep = ", ";
|
|
|
|
|
2014-01-28 17:48:21 -05:00
|
|
|
/* process key */
|
2014-08-09 17:31:13 -04:00
|
|
|
val_type = get_fn_expr_argtype(fcinfo->flinfo, i);
|
|
|
|
|
|
|
|
if (val_type == InvalidOid)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("could not determine data type for argument %d",
|
|
|
|
i + 1)));
|
2014-01-28 17:48:21 -05:00
|
|
|
|
|
|
|
if (PG_ARGISNULL(i))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2014-08-05 12:26:25 -04:00
|
|
|
errmsg("argument %d cannot be null", i + 1),
|
|
|
|
errhint("Object keys should be text.")));
|
2014-01-28 17:48:21 -05:00
|
|
|
|
2014-08-09 17:31:13 -04:00
|
|
|
arg = PG_GETARG_DATUM(i);
|
|
|
|
|
2014-01-28 17:48:21 -05:00
|
|
|
add_json(arg, false, result, val_type, true);
|
|
|
|
|
|
|
|
appendStringInfoString(result, " : ");
|
|
|
|
|
|
|
|
/* process value */
|
|
|
|
val_type = get_fn_expr_argtype(fcinfo->flinfo, i + 1);
|
2014-08-09 17:31:13 -04:00
|
|
|
|
|
|
|
if (val_type == InvalidOid)
|
2014-01-28 17:48:21 -05:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2014-08-05 12:26:25 -04:00
|
|
|
errmsg("could not determine data type for argument %d",
|
|
|
|
i + 2)));
|
2014-08-09 17:31:13 -04:00
|
|
|
|
|
|
|
if (PG_ARGISNULL(i + 1))
|
|
|
|
arg = (Datum) 0;
|
|
|
|
else
|
|
|
|
arg = PG_GETARG_DATUM(i + 1);
|
|
|
|
|
2014-01-28 17:48:21 -05:00
|
|
|
add_json(arg, PG_ARGISNULL(i + 1), result, val_type, false);
|
|
|
|
}
|
2014-08-09 16:35:29 -04:00
|
|
|
|
2014-01-28 17:48:21 -05:00
|
|
|
appendStringInfoChar(result, '}');
|
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* degenerate case of json_build_object where it gets 0 arguments.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_build_object_noargs(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SQL function json_build_array(variadic "any")
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_build_array(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
int nargs = PG_NARGS();
|
|
|
|
int i;
|
|
|
|
Datum arg;
|
2014-08-09 17:31:13 -04:00
|
|
|
const char *sep = "";
|
2014-01-28 17:48:21 -05:00
|
|
|
StringInfo result;
|
|
|
|
Oid val_type;
|
|
|
|
|
|
|
|
result = makeStringInfo();
|
|
|
|
|
|
|
|
appendStringInfoChar(result, '[');
|
|
|
|
|
|
|
|
for (i = 0; i < nargs; i++)
|
|
|
|
{
|
2014-08-09 17:31:13 -04:00
|
|
|
/*
|
|
|
|
* Note: since json_build_array() is declared as taking type "any",
|
|
|
|
* the parser will not do any type conversion on unknown-type literals
|
|
|
|
* (that is, undecorated strings or NULLs). Such values will arrive
|
|
|
|
* here as type UNKNOWN, which fortunately does not matter to us,
|
|
|
|
* since unknownout() works fine.
|
|
|
|
*/
|
|
|
|
appendStringInfoString(result, sep);
|
|
|
|
sep = ", ";
|
|
|
|
|
2014-01-28 17:48:21 -05:00
|
|
|
val_type = get_fn_expr_argtype(fcinfo->flinfo, i);
|
2014-08-09 17:31:13 -04:00
|
|
|
|
|
|
|
if (val_type == InvalidOid)
|
2014-01-28 17:48:21 -05:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2014-08-05 12:26:25 -04:00
|
|
|
errmsg("could not determine data type for argument %d",
|
|
|
|
i + 1)));
|
2014-08-09 17:31:13 -04:00
|
|
|
|
|
|
|
if (PG_ARGISNULL(i))
|
|
|
|
arg = (Datum) 0;
|
|
|
|
else
|
|
|
|
arg = PG_GETARG_DATUM(i);
|
|
|
|
|
2014-01-28 17:48:21 -05:00
|
|
|
add_json(arg, PG_ARGISNULL(i), result, val_type, false);
|
|
|
|
}
|
2014-08-09 17:31:13 -04:00
|
|
|
|
2014-01-28 17:48:21 -05:00
|
|
|
appendStringInfoChar(result, ']');
|
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* degenerate case of json_build_array where it gets 0 arguments.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_build_array_noargs(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_TEXT_P(cstring_to_text_with_len("[]", 2));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SQL function json_object(text[])
|
|
|
|
*
|
2014-08-09 16:35:29 -04:00
|
|
|
* take a one or two dimensional array of text as key/value pairs
|
2014-01-28 17:48:21 -05:00
|
|
|
* for a json object.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_object(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
ArrayType *in_array = PG_GETARG_ARRAYTYPE_P(0);
|
|
|
|
int ndims = ARR_NDIM(in_array);
|
|
|
|
StringInfoData result;
|
|
|
|
Datum *in_datums;
|
|
|
|
bool *in_nulls;
|
|
|
|
int in_count,
|
|
|
|
count,
|
|
|
|
i;
|
|
|
|
text *rval;
|
|
|
|
char *v;
|
|
|
|
|
|
|
|
switch (ndims)
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
PG_RETURN_DATUM(CStringGetTextDatum("{}"));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
if ((ARR_DIMS(in_array)[0]) % 2)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
|
|
errmsg("array must have even number of elements")));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
if ((ARR_DIMS(in_array)[1]) != 2)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
|
|
errmsg("array must have two columns")));
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
|
|
errmsg("wrong number of array subscripts")));
|
|
|
|
}
|
|
|
|
|
|
|
|
deconstruct_array(in_array,
|
|
|
|
TEXTOID, -1, false, 'i',
|
|
|
|
&in_datums, &in_nulls, &in_count);
|
|
|
|
|
|
|
|
count = in_count / 2;
|
|
|
|
|
|
|
|
initStringInfo(&result);
|
|
|
|
|
|
|
|
appendStringInfoChar(&result, '{');
|
|
|
|
|
|
|
|
for (i = 0; i < count; ++i)
|
|
|
|
{
|
|
|
|
if (in_nulls[i * 2])
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
|
|
|
errmsg("null value not allowed for object key")));
|
|
|
|
|
|
|
|
v = TextDatumGetCString(in_datums[i * 2]);
|
|
|
|
if (i > 0)
|
|
|
|
appendStringInfoString(&result, ", ");
|
|
|
|
escape_json(&result, v);
|
|
|
|
appendStringInfoString(&result, " : ");
|
|
|
|
pfree(v);
|
|
|
|
if (in_nulls[i * 2 + 1])
|
|
|
|
appendStringInfoString(&result, "null");
|
|
|
|
else
|
|
|
|
{
|
|
|
|
v = TextDatumGetCString(in_datums[i * 2 + 1]);
|
|
|
|
escape_json(&result, v);
|
|
|
|
pfree(v);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
appendStringInfoChar(&result, '}');
|
|
|
|
|
|
|
|
pfree(in_datums);
|
|
|
|
pfree(in_nulls);
|
|
|
|
|
|
|
|
rval = cstring_to_text_with_len(result.data, result.len);
|
|
|
|
pfree(result.data);
|
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(rval);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SQL function json_object(text[], text[])
|
|
|
|
*
|
2014-08-09 16:35:29 -04:00
|
|
|
* take separate key and value arrays of text to construct a json object
|
2014-01-28 17:48:21 -05:00
|
|
|
* pairwise.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_object_two_arg(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
ArrayType *key_array = PG_GETARG_ARRAYTYPE_P(0);
|
|
|
|
ArrayType *val_array = PG_GETARG_ARRAYTYPE_P(1);
|
|
|
|
int nkdims = ARR_NDIM(key_array);
|
|
|
|
int nvdims = ARR_NDIM(val_array);
|
|
|
|
StringInfoData result;
|
|
|
|
Datum *key_datums,
|
|
|
|
*val_datums;
|
|
|
|
bool *key_nulls,
|
|
|
|
*val_nulls;
|
|
|
|
int key_count,
|
|
|
|
val_count,
|
|
|
|
i;
|
|
|
|
text *rval;
|
|
|
|
char *v;
|
|
|
|
|
|
|
|
if (nkdims > 1 || nkdims != nvdims)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
|
|
errmsg("wrong number of array subscripts")));
|
|
|
|
|
|
|
|
if (nkdims == 0)
|
|
|
|
PG_RETURN_DATUM(CStringGetTextDatum("{}"));
|
|
|
|
|
|
|
|
deconstruct_array(key_array,
|
|
|
|
TEXTOID, -1, false, 'i',
|
|
|
|
&key_datums, &key_nulls, &key_count);
|
|
|
|
|
|
|
|
deconstruct_array(val_array,
|
|
|
|
TEXTOID, -1, false, 'i',
|
|
|
|
&val_datums, &val_nulls, &val_count);
|
|
|
|
|
|
|
|
if (key_count != val_count)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
|
|
errmsg("mismatched array dimensions")));
|
|
|
|
|
|
|
|
initStringInfo(&result);
|
|
|
|
|
|
|
|
appendStringInfoChar(&result, '{');
|
|
|
|
|
|
|
|
for (i = 0; i < key_count; ++i)
|
|
|
|
{
|
|
|
|
if (key_nulls[i])
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
|
|
|
errmsg("null value not allowed for object key")));
|
|
|
|
|
|
|
|
v = TextDatumGetCString(key_datums[i]);
|
|
|
|
if (i > 0)
|
|
|
|
appendStringInfoString(&result, ", ");
|
|
|
|
escape_json(&result, v);
|
|
|
|
appendStringInfoString(&result, " : ");
|
|
|
|
pfree(v);
|
|
|
|
if (val_nulls[i])
|
|
|
|
appendStringInfoString(&result, "null");
|
|
|
|
else
|
|
|
|
{
|
|
|
|
v = TextDatumGetCString(val_datums[i]);
|
|
|
|
escape_json(&result, v);
|
|
|
|
pfree(v);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
appendStringInfoChar(&result, '}');
|
|
|
|
|
|
|
|
pfree(key_datums);
|
|
|
|
pfree(key_nulls);
|
|
|
|
pfree(val_datums);
|
|
|
|
pfree(val_nulls);
|
|
|
|
|
|
|
|
rval = cstring_to_text_with_len(result.data, result.len);
|
|
|
|
pfree(result.data);
|
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(rval);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-02-03 12:11:16 -05:00
|
|
|
/*
|
|
|
|
* Produce a JSON string literal, properly escaping characters in the text.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
escape_json(StringInfo buf, const char *str)
|
|
|
|
{
|
|
|
|
const char *p;
|
|
|
|
|
2015-12-22 22:43:46 -05:00
|
|
|
appendStringInfoCharMacro(buf, '"');
|
2012-02-03 12:11:16 -05:00
|
|
|
for (p = str; *p; p++)
|
|
|
|
{
|
|
|
|
switch (*p)
|
|
|
|
{
|
|
|
|
case '\b':
|
|
|
|
appendStringInfoString(buf, "\\b");
|
|
|
|
break;
|
|
|
|
case '\f':
|
|
|
|
appendStringInfoString(buf, "\\f");
|
|
|
|
break;
|
|
|
|
case '\n':
|
|
|
|
appendStringInfoString(buf, "\\n");
|
|
|
|
break;
|
|
|
|
case '\r':
|
|
|
|
appendStringInfoString(buf, "\\r");
|
|
|
|
break;
|
|
|
|
case '\t':
|
|
|
|
appendStringInfoString(buf, "\\t");
|
|
|
|
break;
|
|
|
|
case '"':
|
|
|
|
appendStringInfoString(buf, "\\\"");
|
|
|
|
break;
|
|
|
|
case '\\':
|
Fix jsonb Unicode escape processing, and in consequence disallow \u0000.
We've been trying to support \u0000 in JSON values since commit
78ed8e03c67d7333, and have introduced increasingly worse hacks to try to
make it work, such as commit 0ad1a816320a2b53. However, it fundamentally
can't work in the way envisioned, because the stored representation looks
the same as for \\u0000 which is not the same thing at all. It's also
entirely bogus to output \u0000 when de-escaped output is called for.
The right way to do this would be to store an actual 0x00 byte, and then
throw error only if asked to produce de-escaped textual output. However,
getting to that point seems likely to take considerable work and may well
never be practical in the 9.4.x series.
To preserve our options for better behavior while getting rid of the nasty
side-effects of 0ad1a816320a2b53, revert that commit in toto and instead
throw error if \u0000 is used in a context where it needs to be de-escaped.
(These are the same contexts where non-ASCII Unicode escapes throw error
if the database encoding isn't UTF8, so this behavior is by no means
without precedent.)
In passing, make both the \u0000 case and the non-ASCII Unicode case report
ERRCODE_UNTRANSLATABLE_CHARACTER / "unsupported Unicode escape sequence"
rather than claiming there's something wrong with the input syntax.
Back-patch to 9.4, where we have to do something because 0ad1a816320a2b53
broke things for many cases having nothing to do with \u0000. 9.3 also has
bogus behavior, but only for that specific escape value, so given the lack
of field complaints it seems better to leave 9.3 alone.
2015-01-30 14:44:46 -05:00
|
|
|
appendStringInfoString(buf, "\\\\");
|
2012-02-03 12:11:16 -05:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if ((unsigned char) *p < ' ')
|
|
|
|
appendStringInfo(buf, "\\u%04x", (int) *p);
|
|
|
|
else
|
|
|
|
appendStringInfoCharMacro(buf, *p);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2015-12-22 22:43:46 -05:00
|
|
|
appendStringInfoCharMacro(buf, '"');
|
2012-02-03 12:11:16 -05:00
|
|
|
}
|
2013-10-10 12:21:59 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* SQL function json_typeof(json) -> text
|
|
|
|
*
|
|
|
|
* Returns the type of the outermost JSON value as TEXT. Possible types are
|
|
|
|
* "object", "array", "string", "number", "boolean", and "null".
|
2013-11-10 09:20:52 -05:00
|
|
|
*
|
2013-10-10 12:21:59 -04:00
|
|
|
* Performs a single call to json_lex() to get the first token of the supplied
|
|
|
|
* value. This initial token uniquely determines the value's type. As our
|
|
|
|
* input must already have been validated by json_in() or json_recv(), the
|
|
|
|
* initial token should never be JSON_TOKEN_OBJECT_END, JSON_TOKEN_ARRAY_END,
|
|
|
|
* JSON_TOKEN_COLON, JSON_TOKEN_COMMA, or JSON_TOKEN_END.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
json_typeof(PG_FUNCTION_ARGS)
|
|
|
|
{
|
Introduce jsonb, a structured format for storing json.
The new format accepts exactly the same data as the json type. However, it is
stored in a format that does not require reparsing the orgiginal text in order
to process it, making it much more suitable for indexing and other operations.
Insignificant whitespace is discarded, and the order of object keys is not
preserved. Neither are duplicate object keys kept - the later value for a given
key is the only one stored.
The new type has all the functions and operators that the json type has,
with the exception of the json generation functions (to_json, json_agg etc.)
and with identical semantics. In addition, there are operator classes for
hash and btree indexing, and two classes for GIN indexing, that have no
equivalent in the json type.
This feature grew out of previous work by Oleg Bartunov and Teodor Sigaev, which
was intended to provide similar facilities to a nested hstore type, but which
in the end proved to have some significant compatibility issues.
Authors: Oleg Bartunov, Teodor Sigaev, Peter Geoghegan and Andrew Dunstan.
Review: Andres Freund
2014-03-23 16:40:19 -04:00
|
|
|
text *json;
|
2013-11-10 09:20:52 -05:00
|
|
|
|
Introduce jsonb, a structured format for storing json.
The new format accepts exactly the same data as the json type. However, it is
stored in a format that does not require reparsing the orgiginal text in order
to process it, making it much more suitable for indexing and other operations.
Insignificant whitespace is discarded, and the order of object keys is not
preserved. Neither are duplicate object keys kept - the later value for a given
key is the only one stored.
The new type has all the functions and operators that the json type has,
with the exception of the json generation functions (to_json, json_agg etc.)
and with identical semantics. In addition, there are operator classes for
hash and btree indexing, and two classes for GIN indexing, that have no
equivalent in the json type.
This feature grew out of previous work by Oleg Bartunov and Teodor Sigaev, which
was intended to provide similar facilities to a nested hstore type, but which
in the end proved to have some significant compatibility issues.
Authors: Oleg Bartunov, Teodor Sigaev, Peter Geoghegan and Andrew Dunstan.
Review: Andres Freund
2014-03-23 16:40:19 -04:00
|
|
|
JsonLexContext *lex;
|
2013-11-10 09:20:52 -05:00
|
|
|
JsonTokenType tok;
|
2014-01-22 08:46:51 -05:00
|
|
|
char *type;
|
2013-11-10 09:20:52 -05:00
|
|
|
|
2017-03-12 19:35:34 -04:00
|
|
|
json = PG_GETARG_TEXT_PP(0);
|
Introduce jsonb, a structured format for storing json.
The new format accepts exactly the same data as the json type. However, it is
stored in a format that does not require reparsing the orgiginal text in order
to process it, making it much more suitable for indexing and other operations.
Insignificant whitespace is discarded, and the order of object keys is not
preserved. Neither are duplicate object keys kept - the later value for a given
key is the only one stored.
The new type has all the functions and operators that the json type has,
with the exception of the json generation functions (to_json, json_agg etc.)
and with identical semantics. In addition, there are operator classes for
hash and btree indexing, and two classes for GIN indexing, that have no
equivalent in the json type.
This feature grew out of previous work by Oleg Bartunov and Teodor Sigaev, which
was intended to provide similar facilities to a nested hstore type, but which
in the end proved to have some significant compatibility issues.
Authors: Oleg Bartunov, Teodor Sigaev, Peter Geoghegan and Andrew Dunstan.
Review: Andres Freund
2014-03-23 16:40:19 -04:00
|
|
|
lex = makeJsonLexContext(json, false);
|
|
|
|
|
2013-11-10 09:20:52 -05:00
|
|
|
/* Lex exactly one token from the input and check its type. */
|
|
|
|
json_lex(lex);
|
|
|
|
tok = lex_peek(lex);
|
|
|
|
switch (tok)
|
|
|
|
{
|
2014-01-22 08:46:51 -05:00
|
|
|
case JSON_TOKEN_OBJECT_START:
|
|
|
|
type = "object";
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_ARRAY_START:
|
|
|
|
type = "array";
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_STRING:
|
|
|
|
type = "string";
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_NUMBER:
|
|
|
|
type = "number";
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_TRUE:
|
|
|
|
case JSON_TOKEN_FALSE:
|
|
|
|
type = "boolean";
|
|
|
|
break;
|
|
|
|
case JSON_TOKEN_NULL:
|
|
|
|
type = "null";
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unexpected json token: %d", tok);
|
2013-11-10 09:20:52 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(cstring_to_text(type));
|
2013-10-10 12:21:59 -04:00
|
|
|
}
|