bpo-42864: Improve error messages regarding unclosed parentheses (GH-24161)
This commit is contained in:
parent
66f77caca3
commit
d6d6371447
@ -160,7 +160,6 @@ class CodeopTests(unittest.TestCase):
|
|||||||
ai("","eval")
|
ai("","eval")
|
||||||
ai("\n","eval")
|
ai("\n","eval")
|
||||||
ai("(","eval")
|
ai("(","eval")
|
||||||
ai("(\n\n\n","eval")
|
|
||||||
ai("(9+","eval")
|
ai("(9+","eval")
|
||||||
ai("9+ \\","eval")
|
ai("9+ \\","eval")
|
||||||
ai("lambda z: \\","eval")
|
ai("lambda z: \\","eval")
|
||||||
|
@ -260,7 +260,7 @@ the \'lazy\' dog.\n\
|
|||||||
for s in samples:
|
for s in samples:
|
||||||
with self.assertRaises(SyntaxError) as cm:
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
compile(s, "<test>", "exec")
|
compile(s, "<test>", "exec")
|
||||||
self.assertIn("unexpected EOF", str(cm.exception))
|
self.assertIn("was never closed", str(cm.exception))
|
||||||
|
|
||||||
var_annot_global: int # a global annotated is necessary for test_var_annot
|
var_annot_global: int # a global annotated is necessary for test_var_annot
|
||||||
|
|
||||||
|
@ -1649,10 +1649,10 @@ def bœr():
|
|||||||
|
|
||||||
self.assertEqual(stdout.splitlines()[1:], [
|
self.assertEqual(stdout.splitlines()[1:], [
|
||||||
'-> pass',
|
'-> pass',
|
||||||
'(Pdb) *** SyntaxError: unexpected EOF while parsing',
|
'(Pdb) *** SyntaxError: \'(\' was never closed',
|
||||||
|
|
||||||
'(Pdb) ENTERING RECURSIVE DEBUGGER',
|
'(Pdb) ENTERING RECURSIVE DEBUGGER',
|
||||||
'*** SyntaxError: unexpected EOF while parsing',
|
'*** SyntaxError: \'(\' was never closed',
|
||||||
'LEAVING RECURSIVE DEBUGGER',
|
'LEAVING RECURSIVE DEBUGGER',
|
||||||
|
|
||||||
'(Pdb) ENTERING RECURSIVE DEBUGGER',
|
'(Pdb) ENTERING RECURSIVE DEBUGGER',
|
||||||
|
@ -987,6 +987,14 @@ def func2():
|
|||||||
self._check_error("A.\u03bc\\\n",
|
self._check_error("A.\u03bc\\\n",
|
||||||
"unexpected EOF while parsing")
|
"unexpected EOF while parsing")
|
||||||
|
|
||||||
|
def test_error_parenthesis(self):
|
||||||
|
for paren in "([{":
|
||||||
|
self._check_error(paren + "1 + 2", f"\\{paren}' was never closed")
|
||||||
|
|
||||||
|
for paren in ")]}":
|
||||||
|
self._check_error(paren + "1 + 2", f"unmatched '\\{paren}'")
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
support.run_unittest(SyntaxTestCase)
|
support.run_unittest(SyntaxTestCase)
|
||||||
from test import test_syntax
|
from test import test_syntax
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
Improve error messages in the parser when parentheses are not closed. Patch
|
||||||
|
by Pablo Galindo.
|
@ -265,6 +265,16 @@ raise_decode_error(Parser *p)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
raise_unclosed_parentheses_error(Parser *p) {
|
||||||
|
int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
|
||||||
|
int error_col = p->tok->parencolstack[p->tok->level-1];
|
||||||
|
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
|
||||||
|
error_lineno, error_col,
|
||||||
|
"'%c' was never closed",
|
||||||
|
p->tok->parenstack[p->tok->level-1]);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
raise_tokenizer_init_error(PyObject *filename)
|
raise_tokenizer_init_error(PyObject *filename)
|
||||||
{
|
{
|
||||||
@ -324,7 +334,11 @@ tokenizer_error(Parser *p)
|
|||||||
RAISE_SYNTAX_ERROR("EOL while scanning string literal");
|
RAISE_SYNTAX_ERROR("EOL while scanning string literal");
|
||||||
return -1;
|
return -1;
|
||||||
case E_EOF:
|
case E_EOF:
|
||||||
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
|
if (p->tok->level) {
|
||||||
|
raise_unclosed_parentheses_error(p);
|
||||||
|
} else {
|
||||||
|
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
|
||||||
|
}
|
||||||
return -1;
|
return -1;
|
||||||
case E_DEDENT:
|
case E_DEDENT:
|
||||||
RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
|
RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
|
||||||
@ -1151,6 +1165,52 @@ reset_parser_state(Parser *p)
|
|||||||
p->call_invalid_rules = 1;
|
p->call_invalid_rules = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
_PyPegen_check_tokenizer_errors(Parser *p) {
|
||||||
|
// Tokenize the whole input to see if there are any tokenization
|
||||||
|
// errors such as mistmatching parentheses. These will get priority
|
||||||
|
// over generic syntax errors only if the line number of the error is
|
||||||
|
// before the one that we had for the generic error.
|
||||||
|
|
||||||
|
// We don't want to tokenize to the end for interactive input
|
||||||
|
if (p->tok->prompt != NULL) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
|
||||||
|
Py_ssize_t current_err_line = current_token->lineno;
|
||||||
|
|
||||||
|
// Save the tokenizer state to restore them later in case we found nothing
|
||||||
|
struct tok_state saved_tok;
|
||||||
|
memcpy(&saved_tok, p->tok, sizeof(struct tok_state));
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
const char *start;
|
||||||
|
const char *end;
|
||||||
|
switch (PyTokenizer_Get(p->tok, &start, &end)) {
|
||||||
|
case ERRORTOKEN:
|
||||||
|
if (p->tok->level != 0) {
|
||||||
|
int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
|
||||||
|
if (current_err_line > error_lineno) {
|
||||||
|
raise_unclosed_parentheses_error(p);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case ENDMARKER:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore the tokenizer state
|
||||||
|
memcpy(p->tok, &saved_tok, sizeof(struct tok_state));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
void *
|
void *
|
||||||
_PyPegen_run_parser(Parser *p)
|
_PyPegen_run_parser(Parser *p)
|
||||||
{
|
{
|
||||||
@ -1164,8 +1224,12 @@ _PyPegen_run_parser(Parser *p)
|
|||||||
if (p->fill == 0) {
|
if (p->fill == 0) {
|
||||||
RAISE_SYNTAX_ERROR("error at start before reading any input");
|
RAISE_SYNTAX_ERROR("error at start before reading any input");
|
||||||
}
|
}
|
||||||
else if (p->tok->done == E_EOF) {
|
else if (p->tok->done == E_EOF) {
|
||||||
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
|
if (p->tok->level) {
|
||||||
|
raise_unclosed_parentheses_error(p);
|
||||||
|
} else {
|
||||||
|
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (p->tokens[p->fill-1]->type == INDENT) {
|
if (p->tokens[p->fill-1]->type == INDENT) {
|
||||||
@ -1175,6 +1239,9 @@ _PyPegen_run_parser(Parser *p)
|
|||||||
RAISE_INDENTATION_ERROR("unexpected unindent");
|
RAISE_INDENTATION_ERROR("unexpected unindent");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
if (_PyPegen_check_tokenizer_errors(p)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
RAISE_SYNTAX_ERROR("invalid syntax");
|
RAISE_SYNTAX_ERROR("invalid syntax");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -64,7 +64,6 @@ tok_new(void)
|
|||||||
tok->tabsize = TABSIZE;
|
tok->tabsize = TABSIZE;
|
||||||
tok->indent = 0;
|
tok->indent = 0;
|
||||||
tok->indstack[0] = 0;
|
tok->indstack[0] = 0;
|
||||||
|
|
||||||
tok->atbol = 1;
|
tok->atbol = 1;
|
||||||
tok->pendin = 0;
|
tok->pendin = 0;
|
||||||
tok->prompt = tok->nextprompt = NULL;
|
tok->prompt = tok->nextprompt = NULL;
|
||||||
@ -1396,6 +1395,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
|||||||
|
|
||||||
/* Check for EOF and errors now */
|
/* Check for EOF and errors now */
|
||||||
if (c == EOF) {
|
if (c == EOF) {
|
||||||
|
if (tok->level) {
|
||||||
|
return ERRORTOKEN;
|
||||||
|
}
|
||||||
return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
|
return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1818,6 +1820,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
|||||||
}
|
}
|
||||||
tok->parenstack[tok->level] = c;
|
tok->parenstack[tok->level] = c;
|
||||||
tok->parenlinenostack[tok->level] = tok->lineno;
|
tok->parenlinenostack[tok->level] = tok->lineno;
|
||||||
|
tok->parencolstack[tok->level] = tok->start - tok->line_start;
|
||||||
tok->level++;
|
tok->level++;
|
||||||
break;
|
break;
|
||||||
case ')':
|
case ')':
|
||||||
|
@ -45,6 +45,7 @@ struct tok_state {
|
|||||||
/* Used to allow free continuations inside them */
|
/* Used to allow free continuations inside them */
|
||||||
char parenstack[MAXLEVEL];
|
char parenstack[MAXLEVEL];
|
||||||
int parenlinenostack[MAXLEVEL];
|
int parenlinenostack[MAXLEVEL];
|
||||||
|
int parencolstack[MAXLEVEL];
|
||||||
PyObject *filename;
|
PyObject *filename;
|
||||||
/* Stuff for checking on different tab sizes */
|
/* Stuff for checking on different tab sizes */
|
||||||
int altindstack[MAXINDENT]; /* Stack of alternate indents */
|
int altindstack[MAXINDENT]; /* Stack of alternate indents */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user