Skip to content

Commit 8445d50

Browse files
committed
Thread-local arenas
Currently, all threads use the same arena for imaging. This can result in a lot of contention when there are enough workers and the mutex is constantly being checked. This commit instead introduces lockless thread-local arenas for environments that support it.
1 parent 128f3f4 commit 8445d50

File tree

4 files changed

+125
-37
lines changed

4 files changed

+125
-37
lines changed

setup.py

Lines changed: 68 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@
88
# ------------------------------
99
from __future__ import annotations
1010

11+
import distutils.ccompiler
1112
import os
1213
import re
1314
import shutil
1415
import struct
1516
import subprocess
1617
import sys
18+
import tempfile
1719
import warnings
1820
from collections.abc import Iterator
1921
from typing import Any
@@ -292,6 +294,47 @@ def _pkg_config(name: str) -> tuple[list[str], list[str]] | None:
292294
return None
293295

294296

297+
def _try_compile(compiler: distutils.ccompiler.CCompiler, code: str) -> bool:
298+
try:
299+
with tempfile.TemporaryDirectory() as d:
300+
fn = os.path.join(d, "test.c")
301+
with open(fn, "w") as f:
302+
f.write(code)
303+
compiler.compile([fn], output_dir=d, extra_preargs=["-Werror"])
304+
return True
305+
except distutils.ccompiler.CompileError:
306+
return False
307+
308+
309+
def _try_compile_attr(compiler: distutils.ccompiler.CCompiler, attr: str) -> bool:
310+
code = f"""
311+
#pragma GCC diagnostic error "-Wattributes"
312+
#pragma clang diagnostic error "-Wattributes"
313+
314+
int {attr} foo;
315+
int main() {{
316+
return 0;
317+
}}
318+
"""
319+
320+
return _try_compile(compiler, code)
321+
322+
323+
def _try_compile_tls_define_macro(
324+
compiler: distutils.ccompiler.CCompiler,
325+
) -> tuple[str, str | None] | None:
326+
if _try_compile_attr(compiler, "thread_local"): # C23
327+
return ("HAVE_THREAD_LOCAL", None)
328+
elif _try_compile_attr(compiler, "_Thread_local"): # C11/C17
329+
return ("HAVE__THREAD_LOCAL", None)
330+
elif _try_compile_attr(compiler, "__thread"): # GCC/clang
331+
return ("HAVE___THREAD", None)
332+
elif _try_compile_attr(compiler, "__declspec(thread)"): # MSVC
333+
return ("HAVE___DECLSPEC_THREAD_", None)
334+
else:
335+
return None
336+
337+
295338
class pil_build_ext(build_ext):
296339
class ext_feature:
297340
features = [
@@ -888,6 +931,10 @@ def build_extensions(self) -> None:
888931
if struct.unpack("h", b"\0\1")[0] == 1:
889932
defs.append(("WORDS_BIGENDIAN", None))
890933

934+
tls_define_macro = _try_compile_tls_define_macro(self.compiler)
935+
if tls_define_macro is not None:
936+
defs.append(tls_define_macro)
937+
891938
defs.append(("PILLOW_VERSION", f'"{PILLOW_VERSION}"'))
892939

893940
self._update_extension("PIL._imaging", libs, defs)
@@ -1008,18 +1055,32 @@ def debug_build() -> bool:
10081055

10091056

10101057
files: list[str | os.PathLike[str]] = ["src/_imaging.c"]
1058+
define_macros: list[tuple[str, str | None]] = []
1059+
1060+
tls_define_macro = _try_compile_tls_define_macro(distutils.ccompiler.new_compiler())
1061+
if tls_define_macro is not None:
1062+
define_macros.append(tls_define_macro)
1063+
10111064
for src_file in _IMAGING:
10121065
files.append("src/" + src_file + ".c")
10131066
for src_file in _LIB_IMAGING:
10141067
files.append(os.path.join("src/libImaging", src_file + ".c"))
10151068
ext_modules = [
1016-
Extension("PIL._imaging", files),
1017-
Extension("PIL._imagingft", ["src/_imagingft.c"]),
1018-
Extension("PIL._imagingcms", ["src/_imagingcms.c"]),
1019-
Extension("PIL._webp", ["src/_webp.c"]),
1020-
Extension("PIL._imagingtk", ["src/_imagingtk.c", "src/Tk/tkImaging.c"]),
1021-
Extension("PIL._imagingmath", ["src/_imagingmath.c"]),
1022-
Extension("PIL._imagingmorph", ["src/_imagingmorph.c"]),
1069+
Extension("PIL._imaging", files, define_macros=define_macros),
1070+
Extension("PIL._imagingft", ["src/_imagingft.c"], define_macros=define_macros[:]),
1071+
Extension("PIL._imagingcms", ["src/_imagingcms.c"], define_macros=define_macros[:]),
1072+
Extension("PIL._webp", ["src/_webp.c"], define_macros=define_macros[:]),
1073+
Extension(
1074+
"PIL._imagingtk",
1075+
["src/_imagingtk.c", "src/Tk/tkImaging.c"],
1076+
define_macros=define_macros[:],
1077+
),
1078+
Extension(
1079+
"PIL._imagingmath", ["src/_imagingmath.c"], define_macros=define_macros[:]
1080+
),
1081+
Extension(
1082+
"PIL._imagingmorph", ["src/_imagingmorph.c"], define_macros=define_macros[:]
1083+
),
10231084
]
10241085

10251086

src/_imaging.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3931,7 +3931,7 @@ _get_stats(PyObject *self, PyObject *args) {
39313931
return NULL;
39323932
}
39333933

3934-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
3934+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
39353935
ImagingMemoryArena arena = &ImagingDefaultArena;
39363936

39373937
v = PyLong_FromLong(arena->stats_new_count);
@@ -3958,7 +3958,7 @@ _get_stats(PyObject *self, PyObject *args) {
39583958
PyDict_SetItemString(d, "blocks_cached", v ? v : Py_None);
39593959
Py_XDECREF(v);
39603960

3961-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
3961+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
39623962
return d;
39633963
}
39643964

@@ -3968,14 +3968,14 @@ _reset_stats(PyObject *self, PyObject *args) {
39683968
return NULL;
39693969
}
39703970

3971-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
3971+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
39723972
ImagingMemoryArena arena = &ImagingDefaultArena;
39733973
arena->stats_new_count = 0;
39743974
arena->stats_allocated_blocks = 0;
39753975
arena->stats_reused_blocks = 0;
39763976
arena->stats_reallocated_blocks = 0;
39773977
arena->stats_freed_blocks = 0;
3978-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
3978+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
39793979

39803980
Py_INCREF(Py_None);
39813981
return Py_None;
@@ -3987,9 +3987,9 @@ _get_alignment(PyObject *self, PyObject *args) {
39873987
return NULL;
39883988
}
39893989

3990-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
3990+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
39913991
int alignment = ImagingDefaultArena.alignment;
3992-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
3992+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
39933993
return PyLong_FromLong(alignment);
39943994
}
39953995

@@ -3999,9 +3999,9 @@ _get_block_size(PyObject *self, PyObject *args) {
39993999
return NULL;
40004000
}
40014001

4002-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4002+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40034003
int block_size = ImagingDefaultArena.block_size;
4004-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4004+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40054005
return PyLong_FromLong(block_size);
40064006
}
40074007

@@ -4011,9 +4011,9 @@ _get_blocks_max(PyObject *self, PyObject *args) {
40114011
return NULL;
40124012
}
40134013

4014-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4014+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40154015
int blocks_max = ImagingDefaultArena.blocks_max;
4016-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4016+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40174017
return PyLong_FromLong(blocks_max);
40184018
}
40194019

@@ -4034,9 +4034,9 @@ _set_alignment(PyObject *self, PyObject *args) {
40344034
return NULL;
40354035
}
40364036

4037-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4037+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40384038
ImagingDefaultArena.alignment = alignment;
4039-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4039+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40404040

40414041
Py_INCREF(Py_None);
40424042
return Py_None;
@@ -4059,9 +4059,9 @@ _set_block_size(PyObject *self, PyObject *args) {
40594059
return NULL;
40604060
}
40614061

4062-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4062+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40634063
ImagingDefaultArena.block_size = block_size;
4064-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4064+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40654065

40664066
Py_INCREF(Py_None);
40674067
return Py_None;
@@ -4085,9 +4085,9 @@ _set_blocks_max(PyObject *self, PyObject *args) {
40854085
return NULL;
40864086
}
40874087

4088-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4088+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40894089
int status = ImagingMemorySetBlocksMax(&ImagingDefaultArena, blocks_max);
4090-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4090+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40914091
if (!status) {
40924092
return ImagingError_MemoryError();
40934093
}
@@ -4104,9 +4104,9 @@ _clear_cache(PyObject *self, PyObject *args) {
41044104
return NULL;
41054105
}
41064106

4107-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4107+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
41084108
ImagingMemoryClearCache(&ImagingDefaultArena, i);
4109-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4109+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
41104110

41114111
Py_INCREF(Py_None);
41124112
return Py_None;

src/libImaging/Imaging.h

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,32 @@ struct ImagingPaletteInstance {
149149
int keep_cache; /* This palette will be reused; keep cache */
150150
};
151151

152+
#define IMAGING_ARENA_LOCK(m)
153+
#define IMAGING_ARENA_UNLOCK(m)
154+
155+
#if defined(__cplusplus)
156+
#define IMAGING_ARENA_TLS thread_local
157+
#elif defined(HAVE_THREAD_LOCAL)
158+
#define IMAGING_ARENA_TLS thread_local
159+
#elif defined(HAVE__THREAD_LOCAL)
160+
#define IMAGING_ARENA_TLS _Thread_local
161+
#elif defined(HAVE___THREAD)
162+
#define IMAGING_ARENA_TLS __thread
163+
#elif defined(HAVE___DECLSPEC_THREAD_)
164+
#define IMAGING_ARENA_TLS __declspec(thread)
165+
#elif defined(Py_GIL_DISABLED)
166+
#define IMAGING_ARENA_TLS
167+
#define IMAGING_ARENA_LOCKING
168+
169+
#undef IMAGING_ARENA_LOCK
170+
#undef IMAGING_ARENA_UNLOCK
171+
172+
#define IMAGING_ARENA_LOCK(m) PyMutex_Lock(m)
173+
#define IMAGING_ARENA_UNLOCK(m) PyMutex_Unlock(m)
174+
#else
175+
#define IMAGING_ARENA_TLS
176+
#endif
177+
152178
typedef struct ImagingMemoryArena {
153179
int alignment; /* Alignment in memory of each line of an image */
154180
int block_size; /* Preferred block size, bytes */
@@ -161,15 +187,16 @@ typedef struct ImagingMemoryArena {
161187
int stats_reallocated_blocks; /* Number of blocks which were actually reallocated
162188
after retrieving */
163189
int stats_freed_blocks; /* Number of freed blocks */
164-
#ifdef Py_GIL_DISABLED
190+
191+
#ifdef IMAGING_ARENA_LOCKING
165192
PyMutex mutex;
166193
#endif
167194
} *ImagingMemoryArena;
168195

169196
/* Objects */
170197
/* ------- */
171198

172-
extern struct ImagingMemoryArena ImagingDefaultArena;
199+
extern IMAGING_ARENA_TLS struct ImagingMemoryArena ImagingDefaultArena;
173200
extern int
174201
ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max);
175202
extern void

src/libImaging/Storage.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,9 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
218218
break;
219219
}
220220

221-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
221+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
222222
ImagingDefaultArena.stats_new_count += 1;
223-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
223+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
224224

225225
return im;
226226
}
@@ -259,7 +259,7 @@ ImagingDelete(Imaging im) {
259259

260260
#define IMAGING_PAGE_SIZE (4096)
261261

262-
struct ImagingMemoryArena ImagingDefaultArena = {
262+
IMAGING_ARENA_TLS struct ImagingMemoryArena ImagingDefaultArena = {
263263
1, // alignment
264264
16 * 1024 * 1024, // block_size
265265
0, // blocks_max
@@ -270,7 +270,7 @@ struct ImagingMemoryArena ImagingDefaultArena = {
270270
0,
271271
0,
272272
0, // Stats
273-
#ifdef Py_GIL_DISABLED
273+
#ifdef IMAGING_ARENA_LOCKING
274274
{0},
275275
#endif
276276
};
@@ -369,12 +369,12 @@ ImagingDestroyArray(Imaging im) {
369369
int y = 0;
370370

371371
if (im->blocks) {
372-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
372+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
373373
while (im->blocks[y].ptr) {
374374
memory_return_block(&ImagingDefaultArena, im->blocks[y]);
375375
y += 1;
376376
}
377-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
377+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
378378
free(im->blocks);
379379
}
380380
}
@@ -504,21 +504,21 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) {
504504
return NULL;
505505
}
506506

507-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
507+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
508508
Imaging tmp = ImagingAllocateArray(
509509
im, &ImagingDefaultArena, dirty, ImagingDefaultArena.block_size
510510
);
511-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
511+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
512512
if (tmp) {
513513
return im;
514514
}
515515

516516
ImagingError_Clear();
517517

518518
// Try to allocate the image once more with smallest possible block size
519-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
519+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
520520
tmp = ImagingAllocateArray(im, &ImagingDefaultArena, dirty, IMAGING_PAGE_SIZE);
521-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
521+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
522522
if (tmp) {
523523
return im;
524524
}

0 commit comments

Comments
 (0)