1996-07-09 06:22:35 +00:00
|
|
|
%{
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-13 23:22:53 +00:00
|
|
|
* scan.l
|
2001-02-21 18:53:47 +00:00
|
|
|
* lexical scanner for PostgreSQL
|
1996-07-09 06:22:35 +00:00
|
|
|
*
|
2002-06-20 20:29:54 +00:00
|
|
|
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
2000-01-26 05:58:53 +00:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 06:22:35 +00:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2002-06-20 20:29:54 +00:00
|
|
|
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.96 2002/06/20 20:29:33 momjian Exp $
|
1996-07-09 06:22:35 +00:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
2000-10-30 17:54:16 +00:00
|
|
|
#include "postgres.h"
|
|
|
|
|
1996-07-09 06:22:35 +00:00
|
|
|
#include <ctype.h>
|
|
|
|
#include <unistd.h>
|
1997-02-19 20:11:05 +00:00
|
|
|
#include <errno.h>
|
1996-07-09 06:22:35 +00:00
|
|
|
|
|
|
|
#include "miscadmin.h"
|
|
|
|
#include "nodes/parsenodes.h"
|
1999-07-16 05:00:38 +00:00
|
|
|
#include "nodes/pg_list.h"
|
1996-11-08 20:46:33 +00:00
|
|
|
#include "parser/gramparse.h"
|
1996-07-09 06:22:35 +00:00
|
|
|
#include "parser/keywords.h"
|
2000-05-29 05:45:56 +00:00
|
|
|
#include "parser/parse.h"
|
1997-02-19 20:11:05 +00:00
|
|
|
#include "utils/builtins.h"
|
1996-07-09 06:22:35 +00:00
|
|
|
|
2000-08-22 13:01:20 +00:00
|
|
|
#ifdef MULTIBYTE
|
|
|
|
#include "mb/pg_wchar.h"
|
|
|
|
#endif
|
|
|
|
|
2002-04-20 21:56:15 +00:00
|
|
|
/* No reason to constrain amount of data slurped */
|
2000-03-13 01:52:06 +00:00
|
|
|
#define YY_READ_BUF_SIZE 16777216
|
|
|
|
|
2001-02-03 20:13:05 +00:00
|
|
|
/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
|
|
|
|
#define fprintf(file, fmt, msg) elog(FATAL, "%s", (msg))
|
|
|
|
|
1996-07-09 06:22:35 +00:00
|
|
|
extern YYSTYPE yylval;
|
1997-09-01 05:51:52 +00:00
|
|
|
|
2001-09-07 23:17:14 +00:00
|
|
|
static int xcdepth = 0; /* depth of nesting in slash-star comments */
|
|
|
|
|
1999-10-18 02:42:31 +00:00
|
|
|
/*
|
|
|
|
* literalbuf is used to accumulate literal values when multiple rules
|
|
|
|
* are needed to parse a single literal. Call startlit to reset buffer
|
|
|
|
* to empty, addlit to add text. Note that the buffer is palloc'd and
|
|
|
|
* starts life afresh on every parse cycle.
|
|
|
|
*/
|
|
|
|
static char *literalbuf; /* expandable buffer */
|
|
|
|
static int literallen; /* actual current length */
|
|
|
|
static int literalalloc; /* current allocated buffer size */
|
|
|
|
|
|
|
|
#define startlit() (literalbuf[0] = '\0', literallen = 0)
|
|
|
|
static void addlit(char *ytext, int yleng);
|
2002-04-20 21:56:15 +00:00
|
|
|
static void addlitchar(unsigned char ychar);
|
|
|
|
static char *litbufdup(void);
|
|
|
|
|
2002-05-01 17:12:08 +00:00
|
|
|
/*
|
|
|
|
* When we parse a token that requires multiple lexer rules to process,
|
|
|
|
* we set token_start to point at the true start of the token, for use
|
|
|
|
* by yyerror(). yytext will point at just the text consumed by the last
|
|
|
|
* rule, so it's not very helpful (eg, it might contain just the last
|
|
|
|
* quote mark of a quoted identifier). But to avoid cluttering every rule
|
|
|
|
* with setting token_start, we allow token_start = NULL to denote that
|
|
|
|
* it's okay to use yytext.
|
|
|
|
*/
|
|
|
|
static char *token_start;
|
|
|
|
|
2002-04-20 21:56:15 +00:00
|
|
|
/* Handles to the buffer that the lexer uses internally */
|
|
|
|
static YY_BUFFER_STATE scanbufhandle;
|
|
|
|
static char *scanbuf;
|
|
|
|
|
|
|
|
unsigned char unescape_single_char(unsigned char c);
|
1997-09-01 05:51:52 +00:00
|
|
|
|
1996-07-09 06:22:35 +00:00
|
|
|
%}
|
2002-04-20 21:56:15 +00:00
|
|
|
|
|
|
|
%option 8bit
|
|
|
|
%option never-interactive
|
|
|
|
%option nounput
|
|
|
|
%option noyywrap
|
|
|
|
%option prefix="base_yy"
|
|
|
|
|
2000-02-19 04:17:25 +00:00
|
|
|
/*
|
|
|
|
* OK, here is a short description of lex/flex rules behavior.
|
1997-11-30 23:05:39 +00:00
|
|
|
* The longest pattern which matches an input string is always chosen.
|
|
|
|
* For equal-length patterns, the first occurring in the rules list is chosen.
|
2000-02-19 04:17:25 +00:00
|
|
|
* INITIAL is the starting state, to which all non-conditional rules apply.
|
|
|
|
* Exclusive states change parsing rules while the state is active. When in
|
|
|
|
* an exclusive state, only those rules defined for that state apply.
|
1997-11-30 23:05:39 +00:00
|
|
|
*
|
2000-02-19 04:17:25 +00:00
|
|
|
* We use exclusive states for quoted strings, extended comments,
|
|
|
|
* and to eliminate parsing troubles for numeric strings.
|
1997-11-30 23:05:39 +00:00
|
|
|
* Exclusive states:
|
2002-06-11 15:44:38 +00:00
|
|
|
* <xb> bit string literal
|
2000-03-18 18:03:12 +00:00
|
|
|
* <xc> extended C-style comments - thomas 1997-07-12
|
|
|
|
* <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
|
1997-11-30 23:05:39 +00:00
|
|
|
* <xh> hexadecimal numeric string - thomas 1997-11-16
|
2000-03-18 18:03:12 +00:00
|
|
|
* <xq> quoted strings - thomas 1997-07-30
|
1997-11-30 23:05:39 +00:00
|
|
|
*/
|
1997-09-08 03:20:18 +00:00
|
|
|
|
2002-06-11 15:44:38 +00:00
|
|
|
%x xb
|
1997-09-01 05:51:52 +00:00
|
|
|
%x xc
|
1997-10-30 16:36:39 +00:00
|
|
|
%x xd
|
1997-11-17 16:31:39 +00:00
|
|
|
%x xh
|
1997-11-30 23:05:39 +00:00
|
|
|
%x xq
|
1997-09-01 05:51:52 +00:00
|
|
|
|
2000-10-31 10:22:13 +00:00
|
|
|
/* Bit string
|
1997-11-30 23:05:39 +00:00
|
|
|
*/
|
2002-06-11 15:44:38 +00:00
|
|
|
xbstart [bB]{quote}
|
|
|
|
xbstop {quote}
|
|
|
|
xbinside [^']*
|
|
|
|
xbcat {quote}{whitespace_with_newline}{quote}
|
1997-11-17 16:31:39 +00:00
|
|
|
|
1997-11-30 23:05:39 +00:00
|
|
|
/* Hexadecimal number
|
|
|
|
*/
|
1997-11-17 16:31:39 +00:00
|
|
|
xhstart [xX]{quote}
|
|
|
|
xhstop {quote}
|
2000-02-19 04:17:25 +00:00
|
|
|
xhinside [^']+
|
|
|
|
xhcat {quote}{whitespace_with_newline}{quote}
|
1997-11-17 16:31:39 +00:00
|
|
|
|
1997-11-30 23:05:39 +00:00
|
|
|
/* Extended quote
|
|
|
|
* xqdouble implements SQL92 embedded quote
|
|
|
|
* xqcat allows strings to cross input lines
|
|
|
|
*/
|
|
|
|
quote '
|
|
|
|
xqstart {quote}
|
|
|
|
xqstop {quote}
|
|
|
|
xqdouble {quote}{quote}
|
2000-02-19 04:17:25 +00:00
|
|
|
xqinside [^\\']+
|
2002-04-20 21:56:15 +00:00
|
|
|
xqescape [\\][^0-7]
|
|
|
|
xqoctesc [\\][0-7]{1,3}
|
2000-02-19 04:17:25 +00:00
|
|
|
xqcat {quote}{whitespace_with_newline}{quote}
|
1997-11-30 23:05:39 +00:00
|
|
|
|
2002-04-20 21:56:15 +00:00
|
|
|
/* Double quote
|
1997-11-30 23:05:39 +00:00
|
|
|
* Allows embedded spaces and other special characters into identifiers.
|
|
|
|
*/
|
1997-10-30 16:36:39 +00:00
|
|
|
dquote \"
|
|
|
|
xdstart {dquote}
|
|
|
|
xdstop {dquote}
|
2000-08-06 17:50:48 +00:00
|
|
|
xddouble {dquote}{dquote}
|
2000-02-19 04:17:25 +00:00
|
|
|
xdinside [^"]+
|
1997-10-30 16:36:39 +00:00
|
|
|
|
2000-02-19 04:17:25 +00:00
|
|
|
/* C-style comments
|
|
|
|
*
|
|
|
|
* The "extended comment" syntax closely resembles allowable operator syntax.
|
|
|
|
* The tricky part here is to get lex to recognize a string starting with
|
|
|
|
* slash-star as a comment, when interpreting it as an operator would produce
|
2000-03-11 05:14:06 +00:00
|
|
|
* a longer match --- remember lex will prefer a longer match! Also, if we
|
|
|
|
* have something like plus-slash-star, lex will think this is a 3-character
|
|
|
|
* operator whereas we want to see it as a + operator and a comment start.
|
|
|
|
* The solution is two-fold:
|
2000-03-18 18:03:12 +00:00
|
|
|
* 1. append {op_chars}* to xcstart so that it matches as much text as
|
2000-03-11 05:14:06 +00:00
|
|
|
* {operator} would. Then the tie-breaker (first matching rule of same
|
|
|
|
* length) ensures xcstart wins. We put back the extra stuff with yyless()
|
|
|
|
* in case it contains a star-slash that should terminate the comment.
|
|
|
|
* 2. In the operator rule, check for slash-star within the operator, and
|
|
|
|
* if found throw it back with yyless(). This handles the plus-slash-star
|
|
|
|
* problem.
|
|
|
|
* SQL92-style comments, which start with dash-dash, have similar interactions
|
|
|
|
* with the operator rule.
|
1997-11-30 23:05:39 +00:00
|
|
|
*/
|
2000-03-18 18:03:12 +00:00
|
|
|
xcstart \/\*{op_chars}*
|
2000-02-19 04:17:25 +00:00
|
|
|
xcstop \*+\/
|
2000-07-14 15:43:57 +00:00
|
|
|
xcinside [^*/]+
|
1996-07-09 06:22:35 +00:00
|
|
|
|
1997-09-08 03:20:18 +00:00
|
|
|
digit [0-9]
|
1997-10-30 15:28:25 +00:00
|
|
|
letter [\200-\377_A-Za-z]
|
|
|
|
letter_or_digit [\200-\377_A-Za-z0-9]
|
1996-07-09 06:22:35 +00:00
|
|
|
|
1997-09-08 03:20:18 +00:00
|
|
|
identifier {letter}{letter_or_digit}*
|
1996-07-09 06:22:35 +00:00
|
|
|
|
1997-09-08 03:20:18 +00:00
|
|
|
typecast "::"
|
1997-09-01 05:51:52 +00:00
|
|
|
|
2000-03-18 18:03:12 +00:00
|
|
|
/*
|
|
|
|
* "self" is the set of chars that should be returned as single-character
|
|
|
|
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
|
|
|
|
* which can be one or more characters long (but if a single-char token
|
|
|
|
* appears in the "self" set, it is not to be returned as an Op). Note
|
|
|
|
* that the sets overlap, but each has some chars that are not in the other.
|
|
|
|
*
|
|
|
|
* If you change either set, adjust the character lists appearing in the
|
|
|
|
* rule for "operator"!
|
|
|
|
*/
|
2000-10-29 16:11:33 +00:00
|
|
|
self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=]
|
2000-08-12 05:15:24 +00:00
|
|
|
op_chars [\~\!\@\#\^\&\|\`\?\$\+\-\*\/\%\<\>\=]
|
2000-03-18 18:03:12 +00:00
|
|
|
operator {op_chars}+
|
1997-09-13 03:12:55 +00:00
|
|
|
|
1999-10-18 02:42:31 +00:00
|
|
|
/* we no longer allow unary minus in numbers.
|
|
|
|
* instead we pass it separately to parser. there it gets
|
1999-09-27 19:40:40 +00:00
|
|
|
* coerced via doNegate() -- Leon aug 20 1999
|
|
|
|
*/
|
1996-07-09 06:22:35 +00:00
|
|
|
|
1999-09-27 19:40:40 +00:00
|
|
|
integer {digit}+
|
|
|
|
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
|
2000-02-19 04:17:25 +00:00
|
|
|
real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
|
1996-07-09 06:22:35 +00:00
|
|
|
|
1997-09-08 03:20:18 +00:00
|
|
|
param \${integer}
|
1996-07-09 06:22:35 +00:00
|
|
|
|
2000-02-19 04:17:25 +00:00
|
|
|
/*
|
|
|
|
* In order to make the world safe for Windows and Mac clients as well as
|
|
|
|
* Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
|
|
|
|
* sequence will be seen as two successive newlines, but that doesn't cause
|
|
|
|
* any problems. SQL92-style comments, which start with -- and extend to the
|
|
|
|
* next newline, are treated as equivalent to a single whitespace character.
|
|
|
|
*
|
|
|
|
* NOTE a fine point: if there is no newline following --, we will absorb
|
|
|
|
* everything to the end of the input as a comment. This is correct. Older
|
|
|
|
* versions of Postgres failed to recognize -- as a comment if the input
|
|
|
|
* did not end with a newline.
|
|
|
|
*
|
|
|
|
* XXX perhaps \f (formfeed) should be treated as a newline as well?
|
|
|
|
*/
|
1996-07-09 06:22:35 +00:00
|
|
|
|
1999-10-09 01:32:38 +00:00
|
|
|
space [ \t\n\r\f]
|
2000-02-19 04:17:25 +00:00
|
|
|
horiz_space [ \t\f]
|
|
|
|
newline [\n\r]
|
|
|
|
non_newline [^\n\r]
|
|
|
|
|
2000-03-18 18:03:12 +00:00
|
|
|
comment ("--"{non_newline}*)
|
2000-02-19 04:17:25 +00:00
|
|
|
|
2002-05-01 17:12:08 +00:00
|
|
|
whitespace ({space}+|{comment})
|
2000-02-19 04:17:25 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* SQL92 requires at least one newline in the whitespace separating
|
|
|
|
* string literals that are to be concatenated. Silly, but who are we
|
|
|
|
* to argue? Note that {whitespace_with_newline} should not have * after
|
|
|
|
* it, whereas {whitespace} should generally have a * after it...
|
|
|
|
*/
|
|
|
|
|
|
|
|
horiz_whitespace ({horiz_space}|{comment})
|
|
|
|
whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
|
|
|
|
|
1997-09-08 03:20:18 +00:00
|
|
|
other .
|
1996-07-09 06:22:35 +00:00
|
|
|
|
2002-04-20 21:56:15 +00:00
|
|
|
/*
|
1997-09-24 17:48:25 +00:00
|
|
|
* Quoted strings must allow some special characters such as single-quote
|
|
|
|
* and newline.
|
2000-03-11 05:14:06 +00:00
|
|
|
* Embedded single-quotes are implemented both in the SQL92-standard
|
1997-09-24 17:48:25 +00:00
|
|
|
* style of two adjacent single quotes "''" and in the Postgres/Java style
|
|
|
|
* of escaped-quote "\'".
|
|
|
|
* Other embedded escaped characters are matched explicitly and the leading
|
|
|
|
* backslash is dropped from the string. - thomas 1997-09-24
|
2000-03-11 05:14:06 +00:00
|
|
|
* Note that xcstart must appear before operator, as explained above!
|
|
|
|
* Also whitespace (comment) must appear before operator.
|
1997-09-13 03:12:55 +00:00
|
|
|
*/
|
1996-07-23 02:23:54 +00:00
|
|
|
|
1997-09-13 03:12:55 +00:00
|
|
|
%%
|
2002-05-01 17:12:08 +00:00
|
|
|
|
|
|
|
%{
|
|
|
|
/* code to execute during start of each call of yylex() */
|
|
|
|
token_start = NULL;
|
|
|
|
%}
|
|
|
|
|
2000-02-19 04:17:25 +00:00
|
|
|
{whitespace} { /* ignore */ }
|
1996-07-09 06:22:35 +00:00
|
|
|
|
2000-03-11 05:14:06 +00:00
|
|
|
{xcstart} {
|
2002-05-01 17:12:08 +00:00
|
|
|
token_start = yytext;
|
2000-07-14 15:43:57 +00:00
|
|
|
xcdepth = 0;
|
2000-03-11 05:14:06 +00:00
|
|
|
BEGIN(xc);
|
|
|
|
/* Put back any characters past slash-star; see above */
|
|
|
|
yyless(2);
|
|
|
|
}
|
1996-07-09 06:22:35 +00:00
|
|
|
|
2000-07-14 15:43:57 +00:00
|
|
|
<xc>{xcstart} {
|
|
|
|
xcdepth++;
|
|
|
|
/* Put back any characters past slash-star; see above */
|
|
|
|
yyless(2);
|
|
|
|
}
|
|
|
|
|
|
|
|
<xc>{xcstop} {
|
|
|
|
if (xcdepth <= 0)
|
2002-05-01 17:12:08 +00:00
|
|
|
{
|
2000-07-14 15:43:57 +00:00
|
|
|
BEGIN(INITIAL);
|
2002-05-01 17:12:08 +00:00
|
|
|
/* reset token_start for next token */
|
|
|
|
token_start = NULL;
|
|
|
|
}
|
2000-07-14 15:43:57 +00:00
|
|
|
else
|
|
|
|
xcdepth--;
|
|
|
|
}
|
1996-07-09 06:22:35 +00:00
|
|
|
|
1997-09-01 05:51:52 +00:00
|
|
|
<xc>{xcinside} { /* ignore */ }
|
|
|
|
|
2000-07-14 15:43:57 +00:00
|
|
|
<xc>{op_chars} { /* ignore */ }
|
|
|
|
|
2002-05-01 17:12:08 +00:00
|
|
|
<xc><<EOF>> { yyerror("unterminated /* comment"); }
|
2000-03-11 05:14:06 +00:00
|
|
|
|
2002-06-11 15:44:38 +00:00
|
|
|
{xbstart} {
|
2002-05-01 17:12:08 +00:00
|
|
|
token_start = yytext;
|
2002-06-11 15:44:38 +00:00
|
|
|
BEGIN(xb);
|
1999-10-18 02:42:31 +00:00
|
|
|
startlit();
|
2002-04-20 21:56:15 +00:00
|
|
|
addlitchar('b');
|
1997-11-17 16:31:39 +00:00
|
|
|
}
|
2002-06-11 15:44:38 +00:00
|
|
|
<xb>{xbstop} {
|
1997-11-17 16:31:39 +00:00
|
|
|
BEGIN(INITIAL);
|
2000-10-31 13:59:53 +00:00
|
|
|
if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0')
|
2002-05-01 17:12:08 +00:00
|
|
|
yyerror("invalid bit string input");
|
2002-04-20 21:56:15 +00:00
|
|
|
yylval.str = litbufdup();
|
2000-10-31 10:22:13 +00:00
|
|
|
return BITCONST;
|
1997-11-17 16:31:39 +00:00
|
|
|
}
|
|
|
|
<xh>{xhinside} |
|
2002-06-11 15:44:38 +00:00
|
|
|
<xb>{xbinside} {
|
1999-10-18 02:42:31 +00:00
|
|
|
addlit(yytext, yyleng);
|
1997-11-17 16:31:39 +00:00
|
|
|
}
|
|
|
|
<xh>{xhcat} |
|
2002-06-11 15:44:38 +00:00
|
|
|
<xb>{xbcat} {
|
2000-02-19 04:17:25 +00:00
|
|
|
/* ignore */
|
1997-11-17 16:31:39 +00:00
|
|
|
}
|
2002-06-11 15:44:38 +00:00
|
|
|
<xb><<EOF>> { yyerror("unterminated bit string literal"); }
|
1997-11-17 16:31:39 +00:00
|
|
|
|
|
|
|
{xhstart} {
|
2002-05-01 17:12:08 +00:00
|
|
|
token_start = yytext;
|
1997-11-17 16:31:39 +00:00
|
|
|
BEGIN(xh);
|
1999-10-18 02:42:31 +00:00
|
|
|
startlit();
|
1997-11-17 16:31:39 +00:00
|
|
|
}
|
|
|
|
<xh>{xhstop} {
|
2001-03-22 17:41:47 +00:00
|
|
|
long val;
|
1997-11-17 16:31:39 +00:00
|
|
|
char* endptr;
|
|
|
|
|
|
|
|
BEGIN(INITIAL);
|
|
|
|
errno = 0;
|
2001-03-22 17:41:47 +00:00
|
|
|
val = strtol(literalbuf, &endptr, 16);
|
|
|
|
if (*endptr != '\0' || errno == ERANGE
|
|
|
|
#ifdef HAVE_LONG_INT_64
|
|
|
|
/* if long > 32 bits, check for overflow of int4 */
|
|
|
|
|| val != (long) ((int32) val)
|
|
|
|
#endif
|
|
|
|
)
|
2002-05-01 17:12:08 +00:00
|
|
|
yyerror("bad hexadecimal integer input");
|
2001-03-22 17:41:47 +00:00
|
|
|
yylval.ival = val;
|
1998-09-01 03:29:17 +00:00
|
|
|
return ICONST;
|
1997-11-17 16:31:39 +00:00
|
|
|
}
|
2002-05-01 17:12:08 +00:00
|
|
|
<xh><<EOF>> { yyerror("unterminated hexadecimal integer"); }
|
1997-11-17 16:31:39 +00:00
|
|
|
|
1997-09-01 05:51:52 +00:00
|
|
|
{xqstart} {
|
2002-05-01 17:12:08 +00:00
|
|
|
token_start = yytext;
|
1997-09-08 03:20:18 +00:00
|
|
|
BEGIN(xq);
|
1999-10-18 02:42:31 +00:00
|
|
|
startlit();
|
1997-09-08 03:20:18 +00:00
|
|
|
}
|
1997-09-01 05:51:52 +00:00
|
|
|
<xq>{xqstop} {
|
1997-09-08 03:20:18 +00:00
|
|
|
BEGIN(INITIAL);
|
2002-04-20 21:56:15 +00:00
|
|
|
yylval.str = litbufdup();
|
1998-09-01 03:29:17 +00:00
|
|
|
return SCONST;
|
1997-09-08 03:20:18 +00:00
|
|
|
}
|
2002-04-20 21:56:15 +00:00
|
|
|
<xq>{xqdouble} {
|
|
|
|
addlitchar('\'');
|
|
|
|
}
|
|
|
|
<xq>{xqinside} {
|
1999-10-18 02:42:31 +00:00
|
|
|
addlit(yytext, yyleng);
|
1997-09-08 03:20:18 +00:00
|
|
|
}
|
2002-04-20 21:56:15 +00:00
|
|
|
<xq>{xqescape} {
|
|
|
|
addlitchar(unescape_single_char(yytext[1]));
|
|
|
|
}
|
|
|
|
<xq>{xqoctesc} {
|
|
|
|
unsigned char c = strtoul(yytext+1, NULL, 8);
|
|
|
|
addlitchar(c);
|
|
|
|
}
|
1997-10-15 01:12:21 +00:00
|
|
|
<xq>{xqcat} {
|
2000-02-19 04:17:25 +00:00
|
|
|
/* ignore */
|
1997-10-15 01:12:21 +00:00
|
|
|
}
|
2002-05-01 17:12:08 +00:00
|
|
|
<xq><<EOF>> { yyerror("unterminated quoted string"); }
|
1997-09-08 03:20:18 +00:00
|
|
|
|
1997-10-30 16:36:39 +00:00
|
|
|
|
|
|
|
{xdstart} {
|
2002-05-01 17:12:08 +00:00
|
|
|
token_start = yytext;
|
1997-10-30 16:36:39 +00:00
|
|
|
BEGIN(xd);
|
1999-10-18 02:42:31 +00:00
|
|
|
startlit();
|
1997-10-30 16:36:39 +00:00
|
|
|
}
|
|
|
|
<xd>{xdstop} {
|
|
|
|
BEGIN(INITIAL);
|
2002-05-01 17:12:08 +00:00
|
|
|
if (literallen == 0)
|
|
|
|
yyerror("zero-length delimited identifier");
|
|
|
|
if (literallen >= NAMEDATALEN)
|
2000-06-01 22:21:05 +00:00
|
|
|
{
|
2000-08-22 13:01:20 +00:00
|
|
|
int len;
|
2002-05-01 17:12:08 +00:00
|
|
|
#ifdef MULTIBYTE
|
|
|
|
len = pg_mbcliplen(literalbuf, literallen,
|
|
|
|
NAMEDATALEN-1);
|
2000-08-22 13:01:20 +00:00
|
|
|
#else
|
2002-05-01 17:12:08 +00:00
|
|
|
len = NAMEDATALEN-1;
|
2000-08-22 13:01:20 +00:00
|
|
|
#endif
|
2002-05-01 17:12:08 +00:00
|
|
|
elog(NOTICE, "identifier \"%s\" will be truncated to \"%.*s\"",
|
|
|
|
literalbuf, len, literalbuf);
|
|
|
|
literalbuf[len] = '\0';
|
|
|
|
literallen = len;
|
2000-06-01 22:21:05 +00:00
|
|
|
}
|
2002-04-20 21:56:15 +00:00
|
|
|
yylval.str = litbufdup();
|
1998-09-01 03:29:17 +00:00
|
|
|
return IDENT;
|
1997-10-30 16:36:39 +00:00
|
|
|
}
|
2000-08-06 17:50:48 +00:00
|
|
|
<xd>{xddouble} {
|
2002-04-20 21:56:15 +00:00
|
|
|
addlitchar('"');
|
2000-08-06 17:50:48 +00:00
|
|
|
}
|
1997-10-30 16:36:39 +00:00
|
|
|
<xd>{xdinside} {
|
1999-10-18 02:42:31 +00:00
|
|
|
addlit(yytext, yyleng);
|
1997-10-30 16:36:39 +00:00
|
|
|
}
|
2002-05-01 17:12:08 +00:00
|
|
|
<xd><<EOF>> { yyerror("unterminated quoted identifier"); }
|
1997-10-30 16:36:39 +00:00
|
|
|
|
1997-09-08 03:20:18 +00:00
|
|
|
{typecast} { return TYPECAST; }
|
|
|
|
|
1999-10-18 02:42:31 +00:00
|
|
|
{self} { return yytext[0]; }
|
1999-09-27 19:40:40 +00:00
|
|
|
|
1997-09-08 03:20:18 +00:00
|
|
|
{operator} {
|
2000-03-18 18:03:12 +00:00
|
|
|
/*
|
|
|
|
* Check for embedded slash-star or dash-dash; those
|
|
|
|
* are comment starts, so operator must stop there.
|
|
|
|
* Note that slash-star or dash-dash at the first
|
|
|
|
* character will match a prior rule, not this one.
|
|
|
|
*/
|
|
|
|
int nchars = yyleng;
|
2002-05-01 17:12:08 +00:00
|
|
|
char *slashstar = strstr(yytext, "/*");
|
|
|
|
char *dashdash = strstr(yytext, "--");
|
2000-03-11 05:14:06 +00:00
|
|
|
|
|
|
|
if (slashstar && dashdash)
|
|
|
|
{
|
2000-03-18 18:03:12 +00:00
|
|
|
/* if both appear, take the first one */
|
2000-03-11 05:14:06 +00:00
|
|
|
if (slashstar > dashdash)
|
|
|
|
slashstar = dashdash;
|
|
|
|
}
|
|
|
|
else if (!slashstar)
|
|
|
|
slashstar = dashdash;
|
|
|
|
if (slashstar)
|
2002-05-01 17:12:08 +00:00
|
|
|
nchars = slashstar - yytext;
|
2000-03-18 18:03:12 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* For SQL92 compatibility, '+' and '-' cannot be the
|
|
|
|
* last char of a multi-char operator unless the operator
|
|
|
|
* contains chars that are not in SQL92 operators.
|
|
|
|
* The idea is to lex '=-' as two operators, but not
|
|
|
|
* to forbid operator names like '?-' that could not be
|
|
|
|
* sequences of SQL92 operators.
|
|
|
|
*/
|
|
|
|
while (nchars > 1 &&
|
|
|
|
(yytext[nchars-1] == '+' ||
|
|
|
|
yytext[nchars-1] == '-'))
|
|
|
|
{
|
|
|
|
int ic;
|
|
|
|
|
|
|
|
for (ic = nchars-2; ic >= 0; ic--)
|
|
|
|
{
|
2000-08-12 05:15:24 +00:00
|
|
|
if (strchr("~!@#^&|`?$%", yytext[ic]))
|
2000-03-18 18:03:12 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (ic >= 0)
|
|
|
|
break; /* found a char that makes it OK */
|
|
|
|
nchars--; /* else remove the +/-, and check again */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nchars < yyleng)
|
2000-03-11 05:14:06 +00:00
|
|
|
{
|
2000-03-18 18:03:12 +00:00
|
|
|
/* Strip the unwanted chars from the token */
|
2000-03-11 05:14:06 +00:00
|
|
|
yyless(nchars);
|
2000-03-18 18:03:12 +00:00
|
|
|
/*
|
|
|
|
* If what we have left is only one char, and it's
|
2000-03-11 05:14:06 +00:00
|
|
|
* one of the characters matching "self", then
|
|
|
|
* return it as a character token the same way
|
|
|
|
* that the "self" rule would have.
|
|
|
|
*/
|
|
|
|
if (nchars == 1 &&
|
2000-11-02 23:20:27 +00:00
|
|
|
strchr(",()[].;$:+-*/%^<>=", yytext[0]))
|
2000-03-11 05:14:06 +00:00
|
|
|
return yytext[0];
|
|
|
|
}
|
|
|
|
|
2000-03-18 18:03:12 +00:00
|
|
|
/* Convert "!=" operator to "<>" for compatibility */
|
2002-05-01 17:12:08 +00:00
|
|
|
if (strcmp(yytext, "!=") == 0)
|
2000-03-18 18:03:12 +00:00
|
|
|
yylval.str = pstrdup("<>");
|
1997-09-08 03:20:18 +00:00
|
|
|
else
|
2002-05-01 17:12:08 +00:00
|
|
|
yylval.str = pstrdup(yytext);
|
1998-09-01 03:29:17 +00:00
|
|
|
return Op;
|
1997-09-08 03:20:18 +00:00
|
|
|
}
|
2000-02-19 04:17:25 +00:00
|
|
|
|
1997-09-08 03:20:18 +00:00
|
|
|
{param} {
|
2002-05-01 17:12:08 +00:00
|
|
|
yylval.ival = atol(yytext + 1);
|
1998-09-01 03:29:17 +00:00
|
|
|
return PARAM;
|
1997-09-08 03:20:18 +00:00
|
|
|
}
|
1997-09-13 03:12:55 +00:00
|
|
|
|
1997-09-08 03:20:18 +00:00
|
|
|
{integer} {
|
2001-03-22 17:41:47 +00:00
|
|
|
long val;
|
1997-10-30 01:55:49 +00:00
|
|
|
char* endptr;
|
|
|
|
|
|
|
|
errno = 0;
|
2002-05-01 17:12:08 +00:00
|
|
|
val = strtol(yytext, &endptr, 10);
|
2001-03-22 17:41:47 +00:00
|
|
|
if (*endptr != '\0' || errno == ERANGE
|
|
|
|
#ifdef HAVE_LONG_INT_64
|
|
|
|
/* if long > 32 bits, check for overflow of int4 */
|
|
|
|
|| val != (long) ((int32) val)
|
|
|
|
#endif
|
|
|
|
)
|
1998-02-11 03:56:08 +00:00
|
|
|
{
|
2000-02-21 18:47:12 +00:00
|
|
|
/* integer too large, treat it as a float */
|
2002-05-01 17:12:08 +00:00
|
|
|
yylval.str = pstrdup(yytext);
|
2000-02-21 18:47:12 +00:00
|
|
|
return FCONST;
|
1998-02-11 03:56:08 +00:00
|
|
|
}
|
2001-03-22 17:41:47 +00:00
|
|
|
yylval.ival = val;
|
1998-09-01 03:29:17 +00:00
|
|
|
return ICONST;
|
1997-09-08 03:20:18 +00:00
|
|
|
}
|
1999-05-12 07:12:51 +00:00
|
|
|
{decimal} {
|
2002-05-01 17:12:08 +00:00
|
|
|
yylval.str = pstrdup(yytext);
|
2000-02-21 18:47:12 +00:00
|
|
|
return FCONST;
|
1999-05-12 07:12:51 +00:00
|
|
|
}
|
1997-09-08 03:20:18 +00:00
|
|
|
{real} {
|
2002-05-01 17:12:08 +00:00
|
|
|
yylval.str = pstrdup(yytext);
|
1998-09-01 03:29:17 +00:00
|
|
|
return FCONST;
|
1997-09-08 03:20:18 +00:00
|
|
|
}
|
1998-05-09 23:15:20 +00:00
|
|
|
|
|
|
|
|
1996-07-09 06:22:35 +00:00
|
|
|
{identifier} {
|
2002-05-02 18:44:11 +00:00
|
|
|
const ScanKeyword *keyword;
|
2002-05-01 17:12:08 +00:00
|
|
|
char *ident;
|
2001-02-21 18:53:47 +00:00
|
|
|
int i;
|
1997-09-08 03:20:18 +00:00
|
|
|
|
2001-02-21 18:53:47 +00:00
|
|
|
/* Is it a keyword? */
|
2002-05-01 17:12:08 +00:00
|
|
|
keyword = ScanKeywordLookup(yytext);
|
2001-02-21 18:53:47 +00:00
|
|
|
if (keyword != NULL)
|
2002-05-02 18:44:11 +00:00
|
|
|
{
|
|
|
|
yylval.keyword = keyword->name;
|
2001-02-21 18:53:47 +00:00
|
|
|
return keyword->value;
|
2002-05-02 18:44:11 +00:00
|
|
|
}
|
2001-02-21 18:53:47 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* No. Convert the identifier to lower case, and truncate
|
|
|
|
* if necessary.
|
|
|
|
*
|
|
|
|
* Note: here we use a locale-dependent case conversion,
|
|
|
|
* which seems appropriate under SQL99 rules, whereas
|
|
|
|
* the keyword comparison was NOT locale-dependent.
|
|
|
|
*/
|
2002-05-01 17:12:08 +00:00
|
|
|
ident = pstrdup(yytext);
|
|
|
|
for (i = 0; ident[i]; i++)
|
2001-02-21 18:53:47 +00:00
|
|
|
{
|
2002-05-01 17:12:08 +00:00
|
|
|
if (isupper((unsigned char) ident[i]))
|
|
|
|
ident[i] = tolower((unsigned char) ident[i]);
|
2001-02-21 18:53:47 +00:00
|
|
|
}
|
1998-08-29 05:27:17 +00:00
|
|
|
if (i >= NAMEDATALEN)
|
2000-01-23 01:27:39 +00:00
|
|
|
{
|
2000-08-22 13:01:20 +00:00
|
|
|
int len;
|
2002-05-01 17:12:08 +00:00
|
|
|
#ifdef MULTIBYTE
|
|
|
|
len = pg_mbcliplen(ident, i, NAMEDATALEN-1);
|
2000-08-22 13:01:20 +00:00
|
|
|
#else
|
2002-05-01 17:12:08 +00:00
|
|
|
len = NAMEDATALEN-1;
|
2000-08-22 13:01:20 +00:00
|
|
|
#endif
|
2002-05-01 17:12:08 +00:00
|
|
|
elog(NOTICE, "identifier \"%s\" will be truncated to \"%.*s\"",
|
|
|
|
ident, len, ident);
|
|
|
|
ident[len] = '\0';
|
2000-01-23 01:27:39 +00:00
|
|
|
}
|
2002-05-01 17:12:08 +00:00
|
|
|
yylval.str = ident;
|
2001-02-21 18:53:47 +00:00
|
|
|
return IDENT;
|
1997-09-08 03:20:18 +00:00
|
|
|
}
|
|
|
|
|
1998-09-01 03:29:17 +00:00
|
|
|
{other} { return yytext[0]; }
|
1996-07-09 06:22:35 +00:00
|
|
|
|
|
|
|
%%
|
|
|
|
|
2000-09-12 21:07:18 +00:00
|
|
|
void
|
|
|
|
yyerror(const char *message)
|
1996-07-09 06:22:35 +00:00
|
|
|
{
|
2002-05-01 17:12:08 +00:00
|
|
|
elog(ERROR, "parser: %s at or near \"%s\"", message,
|
|
|
|
token_start ? token_start : yytext);
|
1996-07-09 06:22:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2002-04-20 21:56:15 +00:00
|
|
|
* Called before any actual parsing is done
|
|
|
|
*/
|
1996-07-09 06:22:35 +00:00
|
|
|
void
|
2002-04-20 21:56:15 +00:00
|
|
|
scanner_init(StringInfo str)
|
1996-07-09 06:22:35 +00:00
|
|
|
{
|
2002-04-20 21:56:15 +00:00
|
|
|
/*
|
|
|
|
* Might be left over after elog()
|
|
|
|
*/
|
|
|
|
if (YY_CURRENT_BUFFER)
|
|
|
|
yy_delete_buffer(YY_CURRENT_BUFFER);
|
|
|
|
|
|
|
|
scanbuf = palloc(str->len + 2);
|
|
|
|
memcpy(scanbuf, str->data, str->len);
|
|
|
|
scanbuf[str->len] = scanbuf[str->len + 1] = YY_END_OF_BUFFER_CHAR;
|
|
|
|
scanbufhandle = yy_scan_buffer(scanbuf, str->len + 2);
|
1999-10-18 02:42:31 +00:00
|
|
|
|
|
|
|
/* initialize literal buffer to a reasonable but expansible size */
|
|
|
|
literalalloc = 128;
|
|
|
|
literalbuf = (char *) palloc(literalalloc);
|
|
|
|
startlit();
|
|
|
|
|
2002-04-20 21:56:15 +00:00
|
|
|
BEGIN(INITIAL);
|
1996-07-09 06:22:35 +00:00
|
|
|
}
|
|
|
|
|
2002-04-20 21:56:15 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Called after parsing is done to clean up after scanner_init()
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
scanner_finish(void)
|
|
|
|
{
|
|
|
|
yy_delete_buffer(scanbufhandle);
|
|
|
|
pfree(scanbuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
1999-10-18 02:42:31 +00:00
|
|
|
static void
|
|
|
|
addlit(char *ytext, int yleng)
|
|
|
|
{
|
|
|
|
/* enlarge buffer if needed */
|
|
|
|
if ((literallen+yleng) >= literalalloc)
|
|
|
|
{
|
|
|
|
do {
|
|
|
|
literalalloc *= 2;
|
|
|
|
} while ((literallen+yleng) >= literalalloc);
|
|
|
|
literalbuf = (char *) repalloc(literalbuf, literalalloc);
|
|
|
|
}
|
2001-09-07 23:17:14 +00:00
|
|
|
/* append new data, add trailing null */
|
|
|
|
memcpy(literalbuf+literallen, ytext, yleng);
|
1999-10-18 02:42:31 +00:00
|
|
|
literallen += yleng;
|
2001-09-07 23:17:14 +00:00
|
|
|
literalbuf[literallen] = '\0';
|
1999-10-18 02:42:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2002-04-20 21:56:15 +00:00
|
|
|
static void
|
|
|
|
addlitchar(unsigned char ychar)
|
1996-07-09 06:22:35 +00:00
|
|
|
{
|
2002-04-20 21:56:15 +00:00
|
|
|
/* enlarge buffer if needed */
|
|
|
|
if ((literallen+1) >= literalalloc)
|
|
|
|
{
|
|
|
|
literalalloc *= 2;
|
|
|
|
literalbuf = (char *) repalloc(literalbuf, literalalloc);
|
|
|
|
}
|
|
|
|
/* append new data, add trailing null */
|
|
|
|
literalbuf[literallen] = ychar;
|
|
|
|
literallen += 1;
|
|
|
|
literalbuf[literallen] = '\0';
|
1996-07-09 06:22:35 +00:00
|
|
|
}
|
|
|
|
|
2002-04-20 21:56:15 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* One might be tempted to write pstrdup(literalbuf) instead of this,
|
|
|
|
* but for long literals this is much faster because the length is
|
|
|
|
* already known.
|
|
|
|
*/
|
|
|
|
static char *
|
|
|
|
litbufdup(void)
|
1996-07-09 06:22:35 +00:00
|
|
|
{
|
2002-04-20 21:56:15 +00:00
|
|
|
char *new;
|
1999-10-18 02:42:31 +00:00
|
|
|
|
2002-04-20 21:56:15 +00:00
|
|
|
new = palloc(literallen + 1);
|
|
|
|
memcpy(new, literalbuf, literallen+1);
|
|
|
|
return new;
|
|
|
|
}
|
1996-07-09 06:22:35 +00:00
|
|
|
|
1999-10-18 02:42:31 +00:00
|
|
|
|
2002-04-20 21:56:15 +00:00
|
|
|
unsigned char
|
|
|
|
unescape_single_char(unsigned char c)
|
1996-07-09 06:22:35 +00:00
|
|
|
{
|
2002-04-20 21:56:15 +00:00
|
|
|
switch (c)
|
|
|
|
{
|
|
|
|
case 'b':
|
|
|
|
return '\b';
|
|
|
|
case 'f':
|
|
|
|
return '\f';
|
|
|
|
case 'n':
|
|
|
|
return '\n';
|
|
|
|
case 'r':
|
|
|
|
return '\r';
|
|
|
|
case 't':
|
|
|
|
return '\t';
|
|
|
|
default:
|
|
|
|
return c;
|
|
|
|
}
|
1996-07-09 06:22:35 +00:00
|
|
|
}
|