gh-132983: Reduce the size of `_zstdmodule.h` (#133793)

This commit is contained in:
Adam Turner 2025-05-10 22:25:22 +01:00 committed by GitHub
parent 4f2f780d53
commit 1a548c0a50
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 166 additions and 165 deletions

View File

@ -3346,7 +3346,7 @@ MODULE__TESTCAPI_DEPS=$(srcdir)/Modules/_testcapi/parts.h $(srcdir)/Modules/_tes
MODULE__TESTLIMITEDCAPI_DEPS=$(srcdir)/Modules/_testlimitedcapi/testcapi_long.h $(srcdir)/Modules/_testlimitedcapi/parts.h $(srcdir)/Modules/_testlimitedcapi/util.h
MODULE__TESTINTERNALCAPI_DEPS=$(srcdir)/Modules/_testinternalcapi/parts.h
MODULE__SQLITE3_DEPS=$(srcdir)/Modules/_sqlite/connection.h $(srcdir)/Modules/_sqlite/cursor.h $(srcdir)/Modules/_sqlite/microprotocols.h $(srcdir)/Modules/_sqlite/module.h $(srcdir)/Modules/_sqlite/prepare_protocol.h $(srcdir)/Modules/_sqlite/row.h $(srcdir)/Modules/_sqlite/util.h
MODULE__ZSTD_DEPS=$(srcdir)/Modules/_zstd/_zstdmodule.h $(srcdir)/Modules/_zstd/buffer.h
MODULE__ZSTD_DEPS=$(srcdir)/Modules/_zstd/_zstdmodule.h $(srcdir)/Modules/_zstd/buffer.h $(srcdir)/Modules/_zstd/zstddict.h
CODECS_COMMON_HEADERS=$(srcdir)/Modules/cjkcodecs/multibytecodec.h $(srcdir)/Modules/cjkcodecs/cjkcodecs.h
MODULE__CODECS_CN_DEPS=$(srcdir)/Modules/cjkcodecs/mappings_cn.h $(CODECS_COMMON_HEADERS)

View File

@ -7,7 +7,13 @@ Python module.
# define Py_BUILD_CORE_MODULE 1
#endif
#include "Python.h"
#include "_zstdmodule.h"
#include "zstddict.h"
#include <zstd.h> // ZSTD_*()
#include <zdict.h> // ZDICT_*()
/*[clinic input]
module _zstd
@ -727,7 +733,7 @@ static struct PyModuleDef_Slot _zstd_slots[] = {
{0, NULL},
};
struct PyModuleDef _zstdmodule = {
static struct PyModuleDef _zstdmodule = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_zstd",
.m_doc = "Implementation module for Zstandard compression.",

View File

@ -1,4 +1,3 @@
#pragma once
/*
Low level interface to Meta's zstd library for use in the compression.zstd
Python module.
@ -6,127 +5,25 @@ Python module.
/* Declarations shared between different parts of the _zstd module*/
#include "Python.h"
#include "zstd.h"
#include "zdict.h"
/* Forward declaration of module state */
typedef struct _zstd_state _zstd_state;
/* Forward reference of module def */
extern PyModuleDef _zstdmodule;
/* For clinic type calculations */
static inline _zstd_state *
get_zstd_state_from_type(PyTypeObject *type)
{
PyObject *module = PyType_GetModuleByDef(type, &_zstdmodule);
if (module == NULL) {
return NULL;
}
void *state = PyModule_GetState(module);
assert(state != NULL);
return (_zstd_state *)state;
}
#ifndef ZSTD_MODULE_H
#define ZSTD_MODULE_H
/* Type specs */
extern PyType_Spec zstd_dict_type_spec;
extern PyType_Spec zstd_compressor_type_spec;
extern PyType_Spec zstd_decompressor_type_spec;
struct _zstd_state {
typedef struct {
/* Module heap types. */
PyTypeObject *ZstdDict_type;
PyTypeObject *ZstdCompressor_type;
PyTypeObject *ZstdDecompressor_type;
PyObject *ZstdError;
/* enum types set by set_parameter_types. */
PyTypeObject *CParameter_type;
PyTypeObject *DParameter_type;
};
typedef struct {
PyObject_HEAD
/* Reusable compress/decompress dictionary, they are created once and
can be shared by multiple threads concurrently, since its usage is
read-only.
c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */
ZSTD_DDict *d_dict;
PyObject *c_dicts;
/* Content of the dictionary, bytes object. */
PyObject *dict_content;
/* Dictionary id */
uint32_t dict_id;
/* __init__ has been called, 0 or 1. */
int inited;
} ZstdDict;
typedef struct {
PyObject_HEAD
/* Compression context */
ZSTD_CCtx *cctx;
/* ZstdDict object in use */
PyObject *dict;
/* Last mode, initialized to ZSTD_e_end */
int last_mode;
/* (nbWorker >= 1) ? 1 : 0 */
int use_multithread;
/* Compression level */
int compression_level;
/* __init__ has been called, 0 or 1. */
int inited;
} ZstdCompressor;
typedef struct {
PyObject_HEAD
/* Decompression context */
ZSTD_DCtx *dctx;
/* ZstdDict object in use */
PyObject *dict;
/* Unconsumed input data */
char *input_buffer;
size_t input_buffer_size;
size_t in_begin, in_end;
/* Unused data */
PyObject *unused_data;
/* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
char needs_input;
/* For decompress(), 0 or 1.
1 when both input and output streams are at a frame edge, means a
frame is completely decoded and fully flushed, or the decompressor
just be initialized. */
char at_frame_edge;
/* For ZstdDecompressor, 0 or 1.
1 means the end of the first frame has been reached. */
char eof;
/* Used for fast reset above three variables */
char _unused_char_for_align;
/* __init__ has been called, 0 or 1. */
int inited;
} ZstdDecompressor;
typedef enum {
TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class
TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function
} decompress_type;
} _zstd_state;
typedef enum {
ERR_DECOMPRESS,
@ -149,12 +46,6 @@ typedef enum {
DICT_TYPE_PREFIX = 2
} dictionary_type;
static inline int
mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out)
{
return in->size == in->pos && out->size != out->pos;
}
/* Format error message and set ZstdError. */
extern void
set_zstd_error(const _zstd_state* const state,
@ -164,4 +55,4 @@ extern void
set_parameter_error(const _zstd_state* const state, int is_compress,
int key_v, int value_v);
static const char init_twice_msg[] = "__init__ method is called twice.";
#endif // !ZSTD_MODULE_H

View File

@ -3,9 +3,13 @@ Low level interface to Meta's zstd library for use in the compression.zstd
Python module.
*/
#include "_zstdmodule.h"
#ifndef ZSTD_BUFFER_H
#define ZSTD_BUFFER_H
#include "pycore_blocks_output_buffer.h"
#include <zstd.h> // ZSTD_outBuffer
/* Blocks output buffer wrapper code */
/* Initialize the buffer, and grow the buffer.
@ -102,3 +106,5 @@ _OutputBuffer_ReachedMaxLength(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob)
return buffer->allocated == buffer->max_length;
}
#endif // !ZSTD_BUFFER_H

View File

@ -7,24 +7,50 @@ Python module.
/*[clinic input]
module _zstd
class _zstd.ZstdCompressor "ZstdCompressor *" "clinic_state()->ZstdCompressor_type"
class _zstd.ZstdCompressor "ZstdCompressor *" "&zstd_compressor_type_spec"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=875bf614798f80cb]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7166021db1ef7df8]*/
#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif
#include "Python.h"
#include "_zstdmodule.h"
#include "buffer.h"
#include "zstddict.h"
#include <stdbool.h> // bool
#include <stddef.h> // offsetof()
#include <zstd.h> // ZSTD_*()
typedef struct {
PyObject_HEAD
/* Compression context */
ZSTD_CCtx *cctx;
/* ZstdDict object in use */
PyObject *dict;
/* Last mode, initialized to ZSTD_e_end */
int last_mode;
/* (nbWorker >= 1) ? 1 : 0 */
int use_multithread;
/* Compression level */
int compression_level;
/* __init__ has been called, 0 or 1. */
bool initialized;
} ZstdCompressor;
#define ZstdCompressor_CAST(op) ((ZstdCompressor *)op)
#include "clinic/compressor.c.h"
static int
_zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options,
const char *arg_name, const char* arg_type)
@ -292,10 +318,6 @@ load:
return 0;
}
#define clinic_state() (get_zstd_state_from_type(type))
#include "clinic/compressor.c.h"
#undef clinic_state
static PyObject *
_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs))
{
@ -305,7 +327,7 @@ _zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject
goto error;
}
self->inited = 0;
self->initialized = 0;
self->dict = NULL;
self->use_multithread = 0;
@ -372,12 +394,11 @@ _zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level,
PyObject *options, PyObject *zstd_dict)
/*[clinic end generated code: output=215e6c4342732f96 input=9f79b0d8d34c8ef0]*/
{
/* Only called once */
if (self->inited) {
PyErr_SetString(PyExc_RuntimeError, init_twice_msg);
if (self->initialized) {
PyErr_SetString(PyExc_RuntimeError, "reinitialization not supported");
return -1;
}
self->inited = 1;
self->initialized = 1;
if (level != Py_None && options != Py_None) {
PyErr_SetString(PyExc_RuntimeError, "Only one of level or options should be used.");
@ -488,6 +509,12 @@ error:
return NULL;
}
static inline int
mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out)
{
return in->size == in->pos && out->size != out->pos;
}
static PyObject *
compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data)
{

View File

@ -7,22 +7,65 @@ Python module.
/*[clinic input]
module _zstd
class _zstd.ZstdDecompressor "ZstdDecompressor *" "clinic_state()->ZstdDecompressor_type"
class _zstd.ZstdDecompressor "ZstdDecompressor *" "&zstd_decompressor_type_spec"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4e6eae327c0c0c76]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e2969ddf48a203e0]*/
#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif
#include "Python.h"
#include "_zstdmodule.h"
#include "buffer.h"
#include "zstddict.h"
#include <stdbool.h> // bool
#include <stddef.h> // offsetof()
#include <zstd.h> // ZSTD_*()
typedef struct {
PyObject_HEAD
/* Decompression context */
ZSTD_DCtx *dctx;
/* ZstdDict object in use */
PyObject *dict;
/* Unconsumed input data */
char *input_buffer;
size_t input_buffer_size;
size_t in_begin, in_end;
/* Unused data */
PyObject *unused_data;
/* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
char needs_input;
/* For decompress(), 0 or 1.
1 when both input and output streams are at a frame edge, means a
frame is completely decoded and fully flushed, or the decompressor
just be initialized. */
char at_frame_edge;
/* For ZstdDecompressor, 0 or 1.
1 means the end of the first frame has been reached. */
char eof;
/* Used for fast reset above three variables */
char _unused_char_for_align;
/* __init__ has been called, 0 or 1. */
bool initialized;
} ZstdDecompressor;
#define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op)
#include "clinic/decompressor.c.h"
static inline ZSTD_DDict *
_get_DDict(ZstdDict *self)
{
@ -215,17 +258,14 @@ load:
return 0;
}
typedef enum {
TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class
TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function
} decompress_type;
/*
Given the two types of decompressors (defined in _zstdmodule.h):
typedef enum {
TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class
TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function
} decompress_type;
Decompress implementation for <D>, <E>, pseudo code:
Given the two types of decompressors (defined above),
decompress implementation for <D>, <E>, pseudo code:
initialize_output_buffer
while True:
@ -616,7 +656,7 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
goto error;
}
self->inited = 0;
self->initialized = 0;
self->dict = NULL;
self->input_buffer = NULL;
self->input_buffer_size = 0;
@ -695,11 +735,11 @@ _zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self,
/*[clinic end generated code: output=703af2f1ec226642 input=8fd72999acc1a146]*/
{
/* Only called once */
if (self->inited) {
PyErr_SetString(PyExc_RuntimeError, init_twice_msg);
if (self->initialized) {
PyErr_SetString(PyExc_RuntimeError, "reinitialization not supported");
return -1;
}
self->inited = 1;
self->initialized = 1;
/* Load dictionary to decompression context */
if (zstd_dict != Py_None) {
@ -802,10 +842,6 @@ _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
return ret;
}
#define clinic_state() (get_zstd_state_from_type(type))
#include "clinic/decompressor.c.h"
#undef clinic_state
static PyMethodDef ZstdDecompressor_methods[] = {
_ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF
{NULL, NULL}

View File

@ -7,17 +7,21 @@ Python module.
/*[clinic input]
module _zstd
class _zstd.ZstdDict "ZstdDict *" "clinic_state()->ZstdDict_type"
class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=a5d1254c497e52ba]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dcc175ec974f81c]*/
#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif
#include "_zstdmodule.h"
#include "Python.h"
#include <stddef.h> // offsetof()
#include "_zstdmodule.h"
#include "zstddict.h"
#include "clinic/zstddict.c.h"
#include <zstd.h> // ZSTD_freeDDict(), ZSTD_getDictID_fromDict()
#define ZstdDict_CAST(op) ((ZstdDict *)op)
@ -31,7 +35,7 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_U
}
self->dict_content = NULL;
self->inited = 0;
self->initialized = 0;
self->d_dict = NULL;
/* ZSTD_CDict dict */
@ -92,11 +96,11 @@ _zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content,
/*[clinic end generated code: output=c5f5a0d8377d037c input=e6750f62a513b3ee]*/
{
/* Only called once */
if (self->inited) {
PyErr_SetString(PyExc_RuntimeError, init_twice_msg);
if (self->initialized) {
PyErr_SetString(PyExc_RuntimeError, "reinitialization not supported");
return -1;
}
self->inited = 1;
self->initialized = 1;
/* Check dict_content's type */
self->dict_content = PyBytes_FromObject(dict_content);
@ -135,10 +139,6 @@ _zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content,
return 0;
}
#define clinic_state() (get_zstd_state(type))
#include "clinic/zstddict.c.h"
#undef clinic_state
PyDoc_STRVAR(ZstdDict_dictid_doc,
"ID of zstd dictionary, a 32-bit unsigned int value.\n\n"
"Non-zero means ordinary dictionary, was created by zstd functions, follow\n"

31
Modules/_zstd/zstddict.h Normal file
View File

@ -0,0 +1,31 @@
/*
Low level interface to Meta's zstd library for use in the compression.zstd
Python module.
*/
#ifndef ZSTD_DICT_H
#define ZSTD_DICT_H
#include <stdbool.h> // bool
#include <zstd.h> // ZSTD_DDict
typedef struct {
PyObject_HEAD
/* Reusable compress/decompress dictionary, they are created once and
can be shared by multiple threads concurrently, since its usage is
read-only.
c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */
ZSTD_DDict *d_dict;
PyObject *c_dicts;
/* Content of the dictionary, bytes object. */
PyObject *dict_content;
/* Dictionary id */
uint32_t dict_id;
/* __init__ has been called, 0 or 1. */
bool initialized;
} ZstdDict;
#endif // !ZSTD_DICT_H

View File

@ -137,6 +137,7 @@
<ItemGroup>
<ClInclude Include="..\Modules\_zstd\_zstdmodule.h" />
<ClInclude Include="..\Modules\_zstd\buffer.h" />
<ClInclude Include="..\Modules\_zstd\zstddict.h" />
<ClInclude Include="$(zstdDir)lib\common\bitstream.h" />
<ClInclude Include="$(zstdDir)lib\common\error_private.h" />
<ClInclude Include="$(zstdDir)lib\common\fse.h" />

View File

@ -128,6 +128,9 @@
<ClInclude Include="..\Modules\_zstd\buffer.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\Modules\_zstd\zstddict.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(zstdDir)lib\zstd.h">
<Filter>Header Files\zstd</Filter>
</ClInclude>