bpo-45434: Mark the PyTokenizer C API as private (GH-28924)

Rename PyTokenize functions to mark them as private:

* PyTokenizer_FindEncodingFilename() => _PyTokenizer_FindEncodingFilename()
* PyTokenizer_FromString() => _PyTokenizer_FromString()
* PyTokenizer_FromFile() => _PyTokenizer_FromFile()
* PyTokenizer_FromUTF8() => _PyTokenizer_FromUTF8()
* PyTokenizer_Free() => _PyTokenizer_Free()
* PyTokenizer_Get() => _PyTokenizer_Get()

Remove the unused PyTokenizer_FindEncoding() function.

import.c: remove unused #include "errcode.h".
This commit is contained in:
Victor Stinner 2021-10-13 17:22:14 +02:00 committed by GitHub
parent 3901c08114
commit 713bb19356
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 40 additions and 48 deletions

View File

@ -729,7 +729,7 @@ _PyPegen_fill_token(Parser *p)
{ {
const char *start; const char *start;
const char *end; const char *end;
int type = PyTokenizer_Get(p->tok, &start, &end); int type = _PyTokenizer_Get(p->tok, &start, &end);
// Record and skip '# type: ignore' comments // Record and skip '# type: ignore' comments
while (type == TYPE_IGNORE) { while (type == TYPE_IGNORE) {
@ -746,7 +746,7 @@ _PyPegen_fill_token(Parser *p)
PyErr_NoMemory(); PyErr_NoMemory();
return -1; return -1;
} }
type = PyTokenizer_Get(p->tok, &start, &end); type = _PyTokenizer_Get(p->tok, &start, &end);
} }
// If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
@ -1306,7 +1306,7 @@ _PyPegen_check_tokenizer_errors(Parser *p) {
for (;;) { for (;;) {
const char *start; const char *start;
const char *end; const char *end;
switch (PyTokenizer_Get(p->tok, &start, &end)) { switch (_PyTokenizer_Get(p->tok, &start, &end)) {
case ERRORTOKEN: case ERRORTOKEN:
if (p->tok->level != 0) { if (p->tok->level != 0) {
int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
@ -1411,7 +1411,7 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
const char *enc, const char *ps1, const char *ps2, const char *enc, const char *ps1, const char *ps2,
PyCompilerFlags *flags, int *errcode, PyArena *arena) PyCompilerFlags *flags, int *errcode, PyArena *arena)
{ {
struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2); struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2);
if (tok == NULL) { if (tok == NULL) {
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
raise_tokenizer_init_error(filename_ob); raise_tokenizer_init_error(filename_ob);
@ -1441,7 +1441,7 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
_PyPegen_Parser_Free(p); _PyPegen_Parser_Free(p);
error: error:
PyTokenizer_Free(tok); _PyTokenizer_Free(tok);
return result; return result;
} }
@ -1453,9 +1453,9 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen
struct tok_state *tok; struct tok_state *tok;
if (flags == NULL || flags->cf_flags & PyCF_IGNORE_COOKIE) { if (flags == NULL || flags->cf_flags & PyCF_IGNORE_COOKIE) {
tok = PyTokenizer_FromUTF8(str, exec_input); tok = _PyTokenizer_FromUTF8(str, exec_input);
} else { } else {
tok = PyTokenizer_FromString(str, exec_input); tok = _PyTokenizer_FromString(str, exec_input);
} }
if (tok == NULL) { if (tok == NULL) {
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
@ -1483,7 +1483,7 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen
_PyPegen_Parser_Free(p); _PyPegen_Parser_Free(p);
error: error:
PyTokenizer_Free(tok); _PyTokenizer_Free(tok);
return result; return result;
} }

View File

@ -386,7 +386,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
str[0] = '('; str[0] = '(';
str[len+1] = ')'; str[len+1] = ')';
struct tok_state* tok = PyTokenizer_FromString(str, 1); struct tok_state* tok = _PyTokenizer_FromString(str, 1);
if (tok == NULL) { if (tok == NULL) {
PyMem_Free(str); PyMem_Free(str);
return NULL; return NULL;
@ -409,7 +409,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
exit: exit:
PyMem_Free(str); PyMem_Free(str);
_PyPegen_Parser_Free(p2); _PyPegen_Parser_Free(p2);
PyTokenizer_Free(tok); _PyTokenizer_Free(tok);
return result; return result;
} }

View File

@ -108,7 +108,7 @@ static char *
error_ret(struct tok_state *tok) /* XXX */ error_ret(struct tok_state *tok) /* XXX */
{ {
tok->decoding_erred = 1; tok->decoding_erred = 1;
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ if (tok->fp != NULL && tok->buf != NULL) /* see _PyTokenizer_Free */
PyMem_Free(tok->buf); PyMem_Free(tok->buf);
tok->buf = tok->cur = tok->inp = NULL; tok->buf = tok->cur = tok->inp = NULL;
tok->start = NULL; tok->start = NULL;
@ -702,7 +702,7 @@ decode_str(const char *input, int single, struct tok_state *tok)
/* Set up tokenizer for string */ /* Set up tokenizer for string */
struct tok_state * struct tok_state *
PyTokenizer_FromString(const char *str, int exec_input) _PyTokenizer_FromString(const char *str, int exec_input)
{ {
struct tok_state *tok = tok_new(); struct tok_state *tok = tok_new();
char *decoded; char *decoded;
@ -711,7 +711,7 @@ PyTokenizer_FromString(const char *str, int exec_input)
return NULL; return NULL;
decoded = decode_str(str, exec_input, tok); decoded = decode_str(str, exec_input, tok);
if (decoded == NULL) { if (decoded == NULL) {
PyTokenizer_Free(tok); _PyTokenizer_Free(tok);
return NULL; return NULL;
} }
@ -723,7 +723,7 @@ PyTokenizer_FromString(const char *str, int exec_input)
/* Set up tokenizer for UTF-8 string */ /* Set up tokenizer for UTF-8 string */
struct tok_state * struct tok_state *
PyTokenizer_FromUTF8(const char *str, int exec_input) _PyTokenizer_FromUTF8(const char *str, int exec_input)
{ {
struct tok_state *tok = tok_new(); struct tok_state *tok = tok_new();
char *translated; char *translated;
@ -731,7 +731,7 @@ PyTokenizer_FromUTF8(const char *str, int exec_input)
return NULL; return NULL;
tok->input = translated = translate_newlines(str, exec_input, tok); tok->input = translated = translate_newlines(str, exec_input, tok);
if (translated == NULL) { if (translated == NULL) {
PyTokenizer_Free(tok); _PyTokenizer_Free(tok);
return NULL; return NULL;
} }
tok->decoding_state = STATE_NORMAL; tok->decoding_state = STATE_NORMAL;
@ -739,7 +739,7 @@ PyTokenizer_FromUTF8(const char *str, int exec_input)
tok->str = translated; tok->str = translated;
tok->encoding = new_string("utf-8", 5, tok); tok->encoding = new_string("utf-8", 5, tok);
if (!tok->encoding) { if (!tok->encoding) {
PyTokenizer_Free(tok); _PyTokenizer_Free(tok);
return NULL; return NULL;
} }
@ -751,14 +751,14 @@ PyTokenizer_FromUTF8(const char *str, int exec_input)
/* Set up tokenizer for file */ /* Set up tokenizer for file */
struct tok_state * struct tok_state *
PyTokenizer_FromFile(FILE *fp, const char* enc, _PyTokenizer_FromFile(FILE *fp, const char* enc,
const char *ps1, const char *ps2) const char *ps1, const char *ps2)
{ {
struct tok_state *tok = tok_new(); struct tok_state *tok = tok_new();
if (tok == NULL) if (tok == NULL)
return NULL; return NULL;
if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) { if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
PyTokenizer_Free(tok); _PyTokenizer_Free(tok);
return NULL; return NULL;
} }
tok->cur = tok->inp = tok->buf; tok->cur = tok->inp = tok->buf;
@ -771,7 +771,7 @@ PyTokenizer_FromFile(FILE *fp, const char* enc,
gets copied into the parse tree. */ gets copied into the parse tree. */
tok->encoding = new_string(enc, strlen(enc), tok); tok->encoding = new_string(enc, strlen(enc), tok);
if (!tok->encoding) { if (!tok->encoding) {
PyTokenizer_Free(tok); _PyTokenizer_Free(tok);
return NULL; return NULL;
} }
tok->decoding_state = STATE_NORMAL; tok->decoding_state = STATE_NORMAL;
@ -782,7 +782,7 @@ PyTokenizer_FromFile(FILE *fp, const char* enc,
/* Free a tok_state structure */ /* Free a tok_state structure */
void void
PyTokenizer_Free(struct tok_state *tok) _PyTokenizer_Free(struct tok_state *tok)
{ {
if (tok->encoding != NULL) { if (tok->encoding != NULL) {
PyMem_Free(tok->encoding); PyMem_Free(tok->encoding);
@ -2049,7 +2049,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
} }
int int
PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end) _PyTokenizer_Get(struct tok_state *tok,
const char **p_start, const char **p_end)
{ {
int result = tok_get(tok, p_start, p_end); int result = tok_get(tok, p_start, p_end);
if (tok->decoding_erred) { if (tok->decoding_erred) {
@ -2062,7 +2063,7 @@ PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Get the encoding of a Python file. Check for the coding cookie and check if /* Get the encoding of a Python file. Check for the coding cookie and check if
the file starts with a BOM. the file starts with a BOM.
PyTokenizer_FindEncodingFilename() returns NULL when it can't find the _PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
encoding in the first or second line of the file (in which case the encoding encoding in the first or second line of the file (in which case the encoding
should be assumed to be UTF-8). should be assumed to be UTF-8).
@ -2070,7 +2071,7 @@ PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end)
by the caller. */ by the caller. */
char * char *
PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) _PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
{ {
struct tok_state *tok; struct tok_state *tok;
FILE *fp; FILE *fp;
@ -2087,7 +2088,7 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
if (fp == NULL) { if (fp == NULL) {
return NULL; return NULL;
} }
tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL); tok = _PyTokenizer_FromFile(fp, NULL, NULL, NULL);
if (tok == NULL) { if (tok == NULL) {
fclose(fp); fclose(fp);
return NULL; return NULL;
@ -2100,12 +2101,12 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
tok->filename = PyUnicode_FromString("<string>"); tok->filename = PyUnicode_FromString("<string>");
if (tok->filename == NULL) { if (tok->filename == NULL) {
fclose(fp); fclose(fp);
PyTokenizer_Free(tok); _PyTokenizer_Free(tok);
return encoding; return encoding;
} }
} }
while (tok->lineno < 2 && tok->done == E_OK) { while (tok->lineno < 2 && tok->done == E_OK) {
PyTokenizer_Get(tok, &p_start, &p_end); _PyTokenizer_Get(tok, &p_start, &p_end);
} }
fclose(fp); fclose(fp);
if (tok->encoding) { if (tok->encoding) {
@ -2114,18 +2115,11 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
strcpy(encoding, tok->encoding); strcpy(encoding, tok->encoding);
} }
} }
PyTokenizer_Free(tok); _PyTokenizer_Free(tok);
return encoding; return encoding;
} }
char *
PyTokenizer_FindEncoding(int fd)
{
return PyTokenizer_FindEncodingFilename(fd, NULL);
}
#ifdef Py_DEBUG #ifdef Py_DEBUG
void void
tok_dump(int type, char *start, char *end) tok_dump(int type, char *start, char *end)
{ {
@ -2133,5 +2127,4 @@ tok_dump(int type, char *start, char *end)
if (type == NAME || type == NUMBER || type == STRING || type == OP) if (type == NAME || type == NUMBER || type == STRING || type == OP)
printf("(%.*s)", (int)(end - start), start); printf("(%.*s)", (int)(end - start), start);
} }
#endif // Py_DEBUG
#endif

View File

@ -86,12 +86,12 @@ struct tok_state {
enum interactive_underflow_t interactive_underflow; enum interactive_underflow_t interactive_underflow;
}; };
extern struct tok_state *PyTokenizer_FromString(const char *, int); extern struct tok_state *_PyTokenizer_FromString(const char *, int);
extern struct tok_state *PyTokenizer_FromUTF8(const char *, int); extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*, extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*,
const char *, const char *); const char *, const char *);
extern void PyTokenizer_Free(struct tok_state *); extern void _PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, const char **, const char **); extern int _PyTokenizer_Get(struct tok_state *, const char **, const char **);
#define tok_dump _Py_tok_dump #define tok_dump _Py_tok_dump

View File

@ -47,7 +47,7 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source)
if (filename == NULL) { if (filename == NULL) {
return NULL; return NULL;
} }
self->tok = PyTokenizer_FromUTF8(source, 1); self->tok = _PyTokenizer_FromUTF8(source, 1);
if (self->tok == NULL) { if (self->tok == NULL) {
Py_DECREF(filename); Py_DECREF(filename);
return NULL; return NULL;
@ -61,7 +61,7 @@ tokenizeriter_next(tokenizeriterobject *it)
{ {
const char *start; const char *start;
const char *end; const char *end;
int type = PyTokenizer_Get(it->tok, &start, &end); int type = _PyTokenizer_Get(it->tok, &start, &end);
if (type == ERRORTOKEN && PyErr_Occurred()) { if (type == ERRORTOKEN && PyErr_Occurred()) {
return NULL; return NULL;
} }
@ -105,7 +105,7 @@ static void
tokenizeriter_dealloc(tokenizeriterobject *it) tokenizeriter_dealloc(tokenizeriterobject *it)
{ {
PyTypeObject *tp = Py_TYPE(it); PyTypeObject *tp = Py_TYPE(it);
PyTokenizer_Free(it->tok); _PyTokenizer_Free(it->tok);
tp->tp_free(it); tp->tp_free(it);
Py_DECREF(tp); Py_DECREF(tp);
} }

View File

@ -11,7 +11,6 @@
#include "pycore_interp.h" // _PyInterpreterState_ClearModules() #include "pycore_interp.h" // _PyInterpreterState_ClearModules()
#include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_sysmodule.h" #include "pycore_sysmodule.h"
#include "errcode.h"
#include "marshal.h" #include "marshal.h"
#include "code.h" #include "code.h"
#include "importdl.h" #include "importdl.h"

View File

@ -29,7 +29,7 @@
#define MAX_NTHREADS 100 #define MAX_NTHREADS 100
/* Function from Parser/tokenizer.c */ /* Function from Parser/tokenizer.c */
extern char * PyTokenizer_FindEncodingFilename(int, PyObject *); extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);
_Py_IDENTIFIER(TextIOWrapper); _Py_IDENTIFIER(TextIOWrapper);
_Py_IDENTIFIER(close); _Py_IDENTIFIER(close);
@ -431,7 +431,7 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i
Py_DECREF(binary); Py_DECREF(binary);
return 0; return 0;
} }
found_encoding = PyTokenizer_FindEncodingFilename(fd, filename); found_encoding = _PyTokenizer_FindEncodingFilename(fd, filename);
if (found_encoding == NULL) if (found_encoding == NULL)
PyErr_Clear(); PyErr_Clear();
encoding = (found_encoding != NULL) ? found_encoding : "utf-8"; encoding = (found_encoding != NULL) ? found_encoding : "utf-8";