bpo-42864: Improve error messages regarding unclosed parentheses (GH-24161)

2021-01-19 23:59:33 +00:00 · 2021-01-19 23:59:33 +00:00 · d6d6371447
commit d6d6371447
parent 66f77caca3
8 changed files with 88 additions and 8 deletions
--- a/Lib/test/test_codeop.py
+++ b/Lib/test/test_codeop.py
@ -160,7 +160,6 @@ class CodeopTests(unittest.TestCase):
        ai("","eval")
        ai("\n","eval")
        ai("(","eval")
        ai("(\n\n\n","eval")
        ai("(9+","eval")
        ai("9+ \\","eval")
        ai("lambda z: \\","eval")
--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@ -260,7 +260,7 @@ the \'lazy\' dog.\n\
        for s in samples:
            with self.assertRaises(SyntaxError) as cm:
                compile(s, "<test>", "exec")
-            self.assertIn("unexpected EOF", str(cm.exception))
+            self.assertIn("was never closed", str(cm.exception))
 var_annot_global: int # a global annotated is necessary for test_var_annot
--- a/Lib/test/test_pdb.py
+++ b/Lib/test/test_pdb.py
@ -1649,10 +1649,10 @@ def bœr():
        self.assertEqual(stdout.splitlines()[1:], [
            '-> pass',
-            '(Pdb) *** SyntaxError: unexpected EOF while parsing',
+            '(Pdb) *** SyntaxError: \'(\' was never closed',
            '(Pdb) ENTERING RECURSIVE DEBUGGER',
-            '*** SyntaxError: unexpected EOF while parsing',
+            '*** SyntaxError: \'(\' was never closed',
            'LEAVING RECURSIVE DEBUGGER',
            '(Pdb) ENTERING RECURSIVE DEBUGGER',
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@ -987,6 +987,14 @@ def func2():
        self._check_error("A.\u03bc\\\n",
                          "unexpected EOF while parsing")
    def test_error_parenthesis(self):
        for paren in "([{":
            self._check_error(paren + "1 + 2", f"\\{paren}' was never closed")
        for paren in ")]}":
            self._check_error(paren + "1 + 2", f"unmatched '\\{paren}'")
 def test_main():
    support.run_unittest(SyntaxTestCase)
    from test import test_syntax
--- a/Builtins/2021-01-14-23-15-34.bpo-42864.QgOAQ1.rst
+++ b/Builtins/2021-01-14-23-15-34.bpo-42864.QgOAQ1.rst
@ -0,0 +1,2 @@
 Improve error messages in the parser when parentheses are not closed. Patch
 by Pablo Galindo.
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@ -265,6 +265,16 @@ raise_decode_error(Parser *p)
    return -1;
 }
 static inline void
 raise_unclosed_parentheses_error(Parser *p) {
       int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
       int error_col = p->tok->parencolstack[p->tok->level-1];
       RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
                                  error_lineno, error_col,
                                  "'%c' was never closed",
                                  p->tok->parenstack[p->tok->level-1]);
 }
 static void
 raise_tokenizer_init_error(PyObject *filename)
 {
@ -324,7 +334,11 @@ tokenizer_error(Parser *p)
            RAISE_SYNTAX_ERROR("EOL while scanning string literal");
            return -1;
        case E_EOF:
-            RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+            if (p->tok->level) {
                raise_unclosed_parentheses_error(p);
            } else {
                RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
            }
            return -1;
        case E_DEDENT:
            RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
@ -1151,6 +1165,52 @@ reset_parser_state(Parser *p)
    p->call_invalid_rules = 1;
 }
 static int
 _PyPegen_check_tokenizer_errors(Parser *p) {
    // Tokenize the whole input to see if there are any tokenization
    // errors such as mistmatching parentheses. These will get priority
    // over generic syntax errors only if the line number of the error is
    // before the one that we had for the generic error.
    // We don't want to tokenize to the end for interactive input
    if (p->tok->prompt != NULL) {
        return 0;
    }
    Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
    Py_ssize_t current_err_line = current_token->lineno;
    // Save the tokenizer state to restore them later in case we found nothing
    struct tok_state saved_tok;
    memcpy(&saved_tok, p->tok, sizeof(struct tok_state));
    for (;;) {
        const char *start;
        const char *end;
        switch (PyTokenizer_Get(p->tok, &start, &end)) {
            case ERRORTOKEN:
                if (p->tok->level != 0) {
                    int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
                    if (current_err_line > error_lineno) {
                        raise_unclosed_parentheses_error(p);
                        return -1;
                    }
                }
                break;
            case ENDMARKER:
                break;
            default:
                continue;
        }
        break;
    }
    // Restore the tokenizer state
    memcpy(p->tok, &saved_tok, sizeof(struct tok_state));
    return 0;
 }
 void *
 _PyPegen_run_parser(Parser *p)
 {
@ -1164,8 +1224,12 @@ _PyPegen_run_parser(Parser *p)
        if (p->fill == 0) {
            RAISE_SYNTAX_ERROR("error at start before reading any input");
        }
-        else if (p->tok->done == E_EOF) {
+       else if (p->tok->done == E_EOF) {
-            RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+            if (p->tok->level) {
                raise_unclosed_parentheses_error(p);
            } else {
                RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
            }
        }
        else {
            if (p->tokens[p->fill-1]->type == INDENT) {
@ -1175,6 +1239,9 @@ _PyPegen_run_parser(Parser *p)
                RAISE_INDENTATION_ERROR("unexpected unindent");
            }
            else {
                if (_PyPegen_check_tokenizer_errors(p)) {
                    return NULL;
                }
                RAISE_SYNTAX_ERROR("invalid syntax");
            }
        }
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -64,7 +64,6 @@ tok_new(void)
    tok->tabsize = TABSIZE;
    tok->indent = 0;
    tok->indstack[0] = 0;
    tok->atbol = 1;
    tok->pendin = 0;
    tok->prompt = tok->nextprompt = NULL;
@ -1396,6 +1395,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
    /* Check for EOF and errors now */
    if (c == EOF) {
        if (tok->level) {
            return ERRORTOKEN;
        }
        return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
    }
@ -1818,6 +1820,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
        }
        tok->parenstack[tok->level] = c;
        tok->parenlinenostack[tok->level] = tok->lineno;
        tok->parencolstack[tok->level] = tok->start - tok->line_start;
        tok->level++;
        break;
    case ')':
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@ -45,6 +45,7 @@ struct tok_state {
            /* Used to allow free continuations inside them */
    char parenstack[MAXLEVEL];
    int parenlinenostack[MAXLEVEL];
    int parencolstack[MAXLEVEL];
    PyObject *filename;
    /* Stuff for checking on different tab sizes */
    int altindstack[MAXINDENT];         /* Stack of alternate indents */
		`@ -0,0 +1,2 @@`
							`Improve error messages in the parser when parentheses are not closed. Patch`
							`by Pablo Galindo.`