Ignore encoding declarations inside strings. Fixes #603509.
This commit is contained in:
parent
65b7282ef7
commit
f62a89b1e0
@ -101,7 +101,7 @@ The encoding is used for all lexical analysis, in particular to find
|
|||||||
the end of a string, and to interpret the contents of Unicode literals.
|
the end of a string, and to interpret the contents of Unicode literals.
|
||||||
String literals are converted to Unicode for syntactical analysis,
|
String literals are converted to Unicode for syntactical analysis,
|
||||||
then converted back to their original encoding before interpretation
|
then converted back to their original encoding before interpretation
|
||||||
starts.
|
starts. The encoding declaration must appear on a line of its own.
|
||||||
|
|
||||||
\subsection{Explicit line joining\label{explicit-joining}}
|
\subsection{Explicit line joining\label{explicit-joining}}
|
||||||
|
|
||||||
|
@ -128,6 +128,7 @@ tok_new(void)
|
|||||||
tok->read_coding_spec = 0;
|
tok->read_coding_spec = 0;
|
||||||
tok->issued_encoding_warning = 0;
|
tok->issued_encoding_warning = 0;
|
||||||
tok->encoding = NULL;
|
tok->encoding = NULL;
|
||||||
|
tok->cont_line = 0;
|
||||||
#ifndef PGEN
|
#ifndef PGEN
|
||||||
tok->decoding_readline = NULL;
|
tok->decoding_readline = NULL;
|
||||||
tok->decoding_buffer = NULL;
|
tok->decoding_buffer = NULL;
|
||||||
@ -207,7 +208,15 @@ static char *
|
|||||||
get_coding_spec(const char *s, int size)
|
get_coding_spec(const char *s, int size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < size - 6; i++) { /* XXX inefficient search */
|
/* Coding spec must be in a comment, and that comment must be
|
||||||
|
* the only statement on the source code line. */
|
||||||
|
for (i = 0; i < size - 6; i++) {
|
||||||
|
if (s[i] == '#')
|
||||||
|
break;
|
||||||
|
if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
for (; i < size - 6; i++) { /* XXX inefficient search */
|
||||||
const char* t = s + i;
|
const char* t = s + i;
|
||||||
if (strncmp(t, "coding", 6) == 0) {
|
if (strncmp(t, "coding", 6) == 0) {
|
||||||
const char* begin = NULL;
|
const char* begin = NULL;
|
||||||
@ -247,6 +256,9 @@ check_coding_spec(const char* line, int size, struct tok_state *tok,
|
|||||||
int set_readline(struct tok_state *, const char *))
|
int set_readline(struct tok_state *, const char *))
|
||||||
{
|
{
|
||||||
int r = 1;
|
int r = 1;
|
||||||
|
if (tok->cont_line)
|
||||||
|
/* It's a continuation line, so it can't be a coding spec. */
|
||||||
|
return 1;
|
||||||
char* cs = get_coding_spec(line, size);
|
char* cs = get_coding_spec(line, size);
|
||||||
if (cs != NULL) {
|
if (cs != NULL) {
|
||||||
tok->read_coding_spec = 1;
|
tok->read_coding_spec = 1;
|
||||||
@ -1158,6 +1170,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
|
|||||||
goto nextline;
|
goto nextline;
|
||||||
*p_start = tok->start;
|
*p_start = tok->start;
|
||||||
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
|
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
|
||||||
|
tok->cont_line = 0;
|
||||||
return NEWLINE;
|
return NEWLINE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1292,6 +1305,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
|
|||||||
return ERRORTOKEN;
|
return ERRORTOKEN;
|
||||||
}
|
}
|
||||||
tripcount = 0;
|
tripcount = 0;
|
||||||
|
tok->cont_line = 1; /* multiline string. */
|
||||||
}
|
}
|
||||||
else if (c == EOF) {
|
else if (c == EOF) {
|
||||||
if (triple)
|
if (triple)
|
||||||
@ -1340,6 +1354,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
|
|||||||
tok->cur = tok->inp;
|
tok->cur = tok->inp;
|
||||||
return ERRORTOKEN;
|
return ERRORTOKEN;
|
||||||
}
|
}
|
||||||
|
tok->cont_line = 1;
|
||||||
goto again; /* Read next line */
|
goto again; /* Read next line */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,6 +45,7 @@ struct tok_state {
|
|||||||
int read_coding_spec; /* whether 'coding:...' has been read */
|
int read_coding_spec; /* whether 'coding:...' has been read */
|
||||||
int issued_encoding_warning; /* whether non-ASCII warning was issued */
|
int issued_encoding_warning; /* whether non-ASCII warning was issued */
|
||||||
char *encoding;
|
char *encoding;
|
||||||
|
int cont_line; /* whether we are in a continuation line. */
|
||||||
#ifndef PGEN
|
#ifndef PGEN
|
||||||
PyObject *decoding_readline; /* codecs.open(...).readline */
|
PyObject *decoding_readline; /* codecs.open(...).readline */
|
||||||
PyObject *decoding_buffer;
|
PyObject *decoding_buffer;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user