Skip to content

gh-132983: Reduce the size of _zstdmodule.h #133793

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -3341,7 +3341,7 @@ MODULE__TESTCAPI_DEPS=$(srcdir)/Modules/_testcapi/parts.h $(srcdir)/Modules/_tes
MODULE__TESTLIMITEDCAPI_DEPS=$(srcdir)/Modules/_testlimitedcapi/testcapi_long.h $(srcdir)/Modules/_testlimitedcapi/parts.h $(srcdir)/Modules/_testlimitedcapi/util.h
MODULE__TESTINTERNALCAPI_DEPS=$(srcdir)/Modules/_testinternalcapi/parts.h
MODULE__SQLITE3_DEPS=$(srcdir)/Modules/_sqlite/connection.h $(srcdir)/Modules/_sqlite/cursor.h $(srcdir)/Modules/_sqlite/microprotocols.h $(srcdir)/Modules/_sqlite/module.h $(srcdir)/Modules/_sqlite/prepare_protocol.h $(srcdir)/Modules/_sqlite/row.h $(srcdir)/Modules/_sqlite/util.h
MODULE__ZSTD_DEPS=$(srcdir)/Modules/_zstd/_zstdmodule.h $(srcdir)/Modules/_zstd/buffer.h
MODULE__ZSTD_DEPS=$(srcdir)/Modules/_zstd/_zstdmodule.h $(srcdir)/Modules/_zstd/buffer.h $(srcdir)/Modules/_zstd/zstddict.h

CODECS_COMMON_HEADERS=$(srcdir)/Modules/cjkcodecs/multibytecodec.h $(srcdir)/Modules/cjkcodecs/cjkcodecs.h
MODULE__CODECS_CN_DEPS=$(srcdir)/Modules/cjkcodecs/mappings_cn.h $(CODECS_COMMON_HEADERS)
Expand Down
8 changes: 7 additions & 1 deletion Modules/_zstd/_zstdmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@ Python module.
# define Py_BUILD_CORE_MODULE 1
#endif

#include "Python.h"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I never remember whether we explicitly include Python.h or not, considering _zstdmodule.h is already including it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Me neither, I'll check to see what's most common


#include "_zstdmodule.h"
#include "zstddict.h"

#include <zstd.h> // ZSTD_*()
#include <zdict.h> // ZDICT_*()

/*[clinic input]
module _zstd
Expand Down Expand Up @@ -727,7 +733,7 @@ static struct PyModuleDef_Slot _zstd_slots[] = {
{0, NULL},
};

struct PyModuleDef _zstdmodule = {
static struct PyModuleDef _zstdmodule = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_zstd",
.m_doc = "Implementation module for Zstandard compression.",
Expand Down
124 changes: 8 additions & 116 deletions Modules/_zstd/_zstdmodule.h
Original file line number Diff line number Diff line change
@@ -1,132 +1,30 @@
#pragma once
/*
Low level interface to Meta's zstd library for use in the compression.zstd
Python module.
*/

/* Declarations shared between different parts of the _zstd module*/

#ifndef ZSTD_MODULE_H
#define ZSTD_MODULE_H
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest having a ZSTD_ZSTDMODULE_H so to mimick the file path. This allows for a possible future Modules/_zstdmodule.h file

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could potentially also rename to _module.{ch} similar to sqlite? I think the guard define isn't public API here so we can change it if we want, but I'm not fully sure.

Copy link
Member

@picnixz picnixz May 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have _io/_iomodule.[ch] and _ctypes/ctypes.h I think it's better to keep the full name because it helps when looking for the file. And yes, AFAIK the guard is not public API, so up to you

#include "Python.h"

#include "zstd.h"
#include "zdict.h"


/* Forward declaration of module state */
typedef struct _zstd_state _zstd_state;

/* Forward reference of module def */
extern PyModuleDef _zstdmodule;

/* For clinic type calculations */
static inline _zstd_state *
get_zstd_state_from_type(PyTypeObject *type)
{
PyObject *module = PyType_GetModuleByDef(type, &_zstdmodule);
if (module == NULL) {
return NULL;
}
void *state = PyModule_GetState(module);
assert(state != NULL);
return (_zstd_state *)state;
}

/* Type specs */
extern PyType_Spec zstd_dict_type_spec;
extern PyType_Spec zstd_compressor_type_spec;
extern PyType_Spec zstd_decompressor_type_spec;

struct _zstd_state {
typedef struct {
/* Module heap types. */
PyTypeObject *ZstdDict_type;
PyTypeObject *ZstdCompressor_type;
PyTypeObject *ZstdDecompressor_type;
PyObject *ZstdError;

/* enum types set by set_parameter_types. */
PyTypeObject *CParameter_type;
PyTypeObject *DParameter_type;
};

typedef struct {
PyObject_HEAD

/* Reusable compress/decompress dictionary, they are created once and
can be shared by multiple threads concurrently, since its usage is
read-only.
c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */
ZSTD_DDict *d_dict;
PyObject *c_dicts;

/* Content of the dictionary, bytes object. */
PyObject *dict_content;
/* Dictionary id */
uint32_t dict_id;

/* __init__ has been called, 0 or 1. */
int inited;
} ZstdDict;

typedef struct {
PyObject_HEAD

/* Compression context */
ZSTD_CCtx *cctx;

/* ZstdDict object in use */
PyObject *dict;

/* Last mode, initialized to ZSTD_e_end */
int last_mode;

/* (nbWorker >= 1) ? 1 : 0 */
int use_multithread;

/* Compression level */
int compression_level;

/* __init__ has been called, 0 or 1. */
int inited;
} ZstdCompressor;

typedef struct {
PyObject_HEAD

/* Decompression context */
ZSTD_DCtx *dctx;

/* ZstdDict object in use */
PyObject *dict;

/* Unconsumed input data */
char *input_buffer;
size_t input_buffer_size;
size_t in_begin, in_end;

/* Unused data */
PyObject *unused_data;

/* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
char needs_input;

/* For decompress(), 0 or 1.
1 when both input and output streams are at a frame edge, means a
frame is completely decoded and fully flushed, or the decompressor
just be initialized. */
char at_frame_edge;

/* For ZstdDecompressor, 0 or 1.
1 means the end of the first frame has been reached. */
char eof;

/* Used for fast reset above three variables */
char _unused_char_for_align;

/* __init__ has been called, 0 or 1. */
int inited;
} ZstdDecompressor;

typedef enum {
TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class
TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function
} decompress_type;
} _zstd_state;

typedef enum {
ERR_DECOMPRESS,
Expand All @@ -149,12 +47,6 @@ typedef enum {
DICT_TYPE_PREFIX = 2
} dictionary_type;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps this should move to zstddict.h?

Copy link
Member Author

@AA-Turner AA-Turner May 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd support moving it, but DICT_TYPE_DIGESTED is used by all three C files, so there's no easy way if I understand correctly. My main goal here was to remove the #include <zstd.h> from the file, though, which is done.

static inline int
mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out)
{
return in->size == in->pos && out->size != out->pos;
}

/* Format error message and set ZstdError. */
extern void
set_zstd_error(const _zstd_state* const state,
Expand All @@ -164,4 +56,4 @@ extern void
set_parameter_error(const _zstd_state* const state, int is_compress,
int key_v, int value_v);

static const char init_twice_msg[] = "__init__ method is called twice.";
#endif
8 changes: 7 additions & 1 deletion Modules/_zstd/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@ Low level interface to Meta's zstd library for use in the compression.zstd
Python module.
*/

#include "_zstdmodule.h"
#ifndef ZSTD_BUFFER_H
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto for the naming + endif comments

#define ZSTD_BUFFER_H
#include "Python.h"
#include "pycore_blocks_output_buffer.h"

#include <zstd.h> // ZSTD_outBuffer

/* Blocks output buffer wrapper code */

/* Initialize the buffer, and grow the buffer.
Expand Down Expand Up @@ -102,3 +106,5 @@ _OutputBuffer_ReachedMaxLength(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob)

return buffer->allocated == buffer->max_length;
}

#endif
53 changes: 40 additions & 13 deletions Modules/_zstd/compressor.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,50 @@ Python module.

/*[clinic input]
module _zstd
class _zstd.ZstdCompressor "ZstdCompressor *" "clinic_state()->ZstdCompressor_type"
class _zstd.ZstdCompressor "ZstdCompressor *" "zstd_compressor_type_spec"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I never used a type spec here as I've only seen clinic_state()->TYPE constructions so I don't know if it's correct. However, if you want to keep this, use &zstd_compressor_type_spec not zstd_compressor_type_spec as you did for the other types.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops re &! There are other examples of _spec being used, so I think it's ok.

[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=875bf614798f80cb]*/

/*[clinic end generated code: output=da39a3ee5e6b4b0d input=842f2c4f4520ab26]*/

#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif

#include "_zstdmodule.h"
#include "Python.h"

#include "_zstdmodule.h"
#include "buffer.h"
#include "zstddict.h"

#include <stdbool.h> // bool
#include <stddef.h> // offsetof()
#include <zstd.h> // ZSTD_*()

typedef struct {
PyObject_HEAD

/* Compression context */
ZSTD_CCtx *cctx;

/* ZstdDict object in use */
PyObject *dict;

/* Last mode, initialized to ZSTD_e_end */
int last_mode;

/* (nbWorker >= 1) ? 1 : 0 */
int use_multithread;

/* Compression level */
int compression_level;

/* __init__ has been called, 0 or 1. */
bool initialized;
} ZstdCompressor;

#define ZstdCompressor_CAST(op) ((ZstdCompressor *)op)

#include "clinic/compressor.c.h"

static int
_zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options,
const char *arg_name, const char* arg_type)
Expand Down Expand Up @@ -292,10 +318,6 @@ _zstd_load_c_dict(ZstdCompressor *self, PyObject *dict)
return 0;
}

#define clinic_state() (get_zstd_state_from_type(type))
#include "clinic/compressor.c.h"
#undef clinic_state

static PyObject *
_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs))
{
Expand All @@ -305,7 +327,7 @@ _zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject
goto error;
}

self->inited = 0;
self->initialized = 0;
self->dict = NULL;
self->use_multithread = 0;

Expand Down Expand Up @@ -372,12 +394,11 @@ _zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level,
PyObject *options, PyObject *zstd_dict)
/*[clinic end generated code: output=215e6c4342732f96 input=9f79b0d8d34c8ef0]*/
{
/* Only called once */
if (self->inited) {
PyErr_SetString(PyExc_RuntimeError, init_twice_msg);
if (self->initialized) {
PyErr_SetString(PyExc_RuntimeError, "reinitialization not supported");
return -1;
}
self->inited = 1;
self->initialized = 1;

if (level != Py_None && options != Py_None) {
PyErr_SetString(PyExc_RuntimeError, "Only one of level or options should be used.");
Expand Down Expand Up @@ -488,6 +509,12 @@ compress_impl(ZstdCompressor *self, Py_buffer *data,
return NULL;
}

static inline int
mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out)
{
return in->size == in->pos && out->size != out->pos;
}

static PyObject *
compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data)
{
Expand Down
Loading
Loading