Skip to content

Commit e76b4d4

Browse files
committed
Thread-local arenas
Currently, all threads use the same arena for imaging. This can result in a lot of contention when there are enough workers and the mutex is constantly being checked. This commit instead introduces lockless thread-local arenas for environments that support it.
1 parent 128f3f4 commit e76b4d4

File tree

4 files changed

+113
-37
lines changed

4 files changed

+113
-37
lines changed

setup.py

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@
88
# ------------------------------
99
from __future__ import annotations
1010

11+
import distutils.ccompiler
1112
import os
1213
import re
1314
import shutil
1415
import struct
1516
import subprocess
1617
import sys
18+
import tempfile
1719
import warnings
1820
from collections.abc import Iterator
1921
from typing import Any
@@ -292,6 +294,47 @@ def _pkg_config(name: str) -> tuple[list[str], list[str]] | None:
292294
return None
293295

294296

297+
def _try_compile(compiler: distutils.ccompiler.CCompiler, code: str) -> bool:
298+
try:
299+
with tempfile.TemporaryDirectory() as d:
300+
fn = os.path.join(d, "test.c")
301+
with open(fn, "w") as f:
302+
f.write(code)
303+
compiler.compile([fn], output_dir=d, extra_preargs=["-Werror"])
304+
return True
305+
except distutils.ccompiler.CompileError:
306+
return False
307+
308+
309+
def _try_compile_attr(compiler: distutils.ccompiler.CCompiler, attr: str) -> bool:
310+
code = f"""
311+
#pragma GCC diagnostic error "-Wattributes"
312+
#pragma clang diagnostic error "-Wattributes"
313+
314+
int {attr} foo;
315+
int main() {{
316+
return 0;
317+
}}
318+
"""
319+
320+
return _try_compile(compiler, code)
321+
322+
323+
def _try_compile_tls_define_macros(
324+
compiler: distutils.ccompiler.CCompiler,
325+
) -> list[tuple[str, str | None]]:
326+
if _try_compile_attr(compiler, "thread_local"): # C23
327+
return [("HAVE_THREAD_LOCAL", None)]
328+
elif _try_compile_attr(compiler, "_Thread_local"): # C11/C17
329+
return [("HAVE__THREAD_LOCAL", None)]
330+
elif _try_compile_attr(compiler, "__thread"): # GCC/clang
331+
return [("HAVE___THREAD", None)]
332+
elif _try_compile_attr(compiler, "__declspec(thread)"): # MSVC
333+
return [("HAVE___DECLSPEC_THREAD_", None)]
334+
else:
335+
return []
336+
337+
295338
class pil_build_ext(build_ext):
296339
class ext_feature:
297340
features = [
@@ -426,13 +469,14 @@ def finalize_options(self) -> None:
426469
def _update_extension(
427470
self,
428471
name: str,
429-
libraries: list[str] | list[str | bool | None],
472+
libraries: list[str] | list[str | bool | None] | None = None,
430473
define_macros: list[tuple[str, str | None]] | None = None,
431474
sources: list[str] | None = None,
432475
) -> None:
433476
for extension in self.extensions:
434477
if extension.name == name:
435-
extension.libraries += libraries
478+
if libraries is not None:
479+
extension.libraries += libraries
436480
if define_macros is not None:
437481
extension.define_macros += define_macros
438482
if sources is not None:
@@ -890,7 +934,10 @@ def build_extensions(self) -> None:
890934

891935
defs.append(("PILLOW_VERSION", f'"{PILLOW_VERSION}"'))
892936

893-
self._update_extension("PIL._imaging", libs, defs)
937+
tls_define_macros = _try_compile_tls_define_macros(self.compiler)
938+
self._update_extension("PIL._imaging", libs, defs + tls_define_macros)
939+
self._update_extension("PIL._imagingmath", define_macros=tls_define_macros[:])
940+
self._update_extension("PIL._imagingmorph", define_macros=tls_define_macros[:])
894941

895942
#
896943
# additional libraries
@@ -913,7 +960,9 @@ def build_extensions(self) -> None:
913960
libs.append(feature.get("fribidi"))
914961
else: # building FriBiDi shim from src/thirdparty
915962
srcs.append("src/thirdparty/fribidi-shim/fribidi.c")
916-
self._update_extension("PIL._imagingft", libs, defs, srcs)
963+
self._update_extension(
964+
"PIL._imagingft", libs, defs + tls_define_macros[:], srcs
965+
)
917966

918967
else:
919968
self._remove_extension("PIL._imagingft")
@@ -922,19 +971,19 @@ def build_extensions(self) -> None:
922971
libs = [feature.get("lcms")]
923972
if sys.platform == "win32":
924973
libs.extend(["user32", "gdi32"])
925-
self._update_extension("PIL._imagingcms", libs)
974+
self._update_extension("PIL._imagingcms", libs, tls_define_macros[:])
926975
else:
927976
self._remove_extension("PIL._imagingcms")
928977

929978
webp = feature.get("webp")
930979
if isinstance(webp, str):
931980
libs = [webp, webp + "mux", webp + "demux"]
932-
self._update_extension("PIL._webp", libs)
981+
self._update_extension("PIL._webp", libs, tls_define_macros[:])
933982
else:
934983
self._remove_extension("PIL._webp")
935984

936985
tk_libs = ["psapi"] if sys.platform in ("win32", "cygwin") else []
937-
self._update_extension("PIL._imagingtk", tk_libs)
986+
self._update_extension("PIL._imagingtk", tk_libs, tls_define_macros[:])
938987

939988
build_ext.build_extensions(self)
940989

src/_imaging.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3931,7 +3931,7 @@ _get_stats(PyObject *self, PyObject *args) {
39313931
return NULL;
39323932
}
39333933

3934-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
3934+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
39353935
ImagingMemoryArena arena = &ImagingDefaultArena;
39363936

39373937
v = PyLong_FromLong(arena->stats_new_count);
@@ -3958,7 +3958,7 @@ _get_stats(PyObject *self, PyObject *args) {
39583958
PyDict_SetItemString(d, "blocks_cached", v ? v : Py_None);
39593959
Py_XDECREF(v);
39603960

3961-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
3961+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
39623962
return d;
39633963
}
39643964

@@ -3968,14 +3968,14 @@ _reset_stats(PyObject *self, PyObject *args) {
39683968
return NULL;
39693969
}
39703970

3971-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
3971+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
39723972
ImagingMemoryArena arena = &ImagingDefaultArena;
39733973
arena->stats_new_count = 0;
39743974
arena->stats_allocated_blocks = 0;
39753975
arena->stats_reused_blocks = 0;
39763976
arena->stats_reallocated_blocks = 0;
39773977
arena->stats_freed_blocks = 0;
3978-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
3978+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
39793979

39803980
Py_INCREF(Py_None);
39813981
return Py_None;
@@ -3987,9 +3987,9 @@ _get_alignment(PyObject *self, PyObject *args) {
39873987
return NULL;
39883988
}
39893989

3990-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
3990+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
39913991
int alignment = ImagingDefaultArena.alignment;
3992-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
3992+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
39933993
return PyLong_FromLong(alignment);
39943994
}
39953995

@@ -3999,9 +3999,9 @@ _get_block_size(PyObject *self, PyObject *args) {
39993999
return NULL;
40004000
}
40014001

4002-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4002+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40034003
int block_size = ImagingDefaultArena.block_size;
4004-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4004+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40054005
return PyLong_FromLong(block_size);
40064006
}
40074007

@@ -4011,9 +4011,9 @@ _get_blocks_max(PyObject *self, PyObject *args) {
40114011
return NULL;
40124012
}
40134013

4014-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4014+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40154015
int blocks_max = ImagingDefaultArena.blocks_max;
4016-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4016+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40174017
return PyLong_FromLong(blocks_max);
40184018
}
40194019

@@ -4034,9 +4034,9 @@ _set_alignment(PyObject *self, PyObject *args) {
40344034
return NULL;
40354035
}
40364036

4037-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4037+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40384038
ImagingDefaultArena.alignment = alignment;
4039-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4039+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40404040

40414041
Py_INCREF(Py_None);
40424042
return Py_None;
@@ -4059,9 +4059,9 @@ _set_block_size(PyObject *self, PyObject *args) {
40594059
return NULL;
40604060
}
40614061

4062-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4062+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40634063
ImagingDefaultArena.block_size = block_size;
4064-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4064+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40654065

40664066
Py_INCREF(Py_None);
40674067
return Py_None;
@@ -4085,9 +4085,9 @@ _set_blocks_max(PyObject *self, PyObject *args) {
40854085
return NULL;
40864086
}
40874087

4088-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4088+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40894089
int status = ImagingMemorySetBlocksMax(&ImagingDefaultArena, blocks_max);
4090-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4090+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40914091
if (!status) {
40924092
return ImagingError_MemoryError();
40934093
}
@@ -4104,9 +4104,9 @@ _clear_cache(PyObject *self, PyObject *args) {
41044104
return NULL;
41054105
}
41064106

4107-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4107+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
41084108
ImagingMemoryClearCache(&ImagingDefaultArena, i);
4109-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4109+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
41104110

41114111
Py_INCREF(Py_None);
41124112
return Py_None;

src/libImaging/Imaging.h

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,32 @@ struct ImagingPaletteInstance {
149149
int keep_cache; /* This palette will be reused; keep cache */
150150
};
151151

152+
#define IMAGING_ARENA_LOCK(m)
153+
#define IMAGING_ARENA_UNLOCK(m)
154+
155+
#if defined(__cplusplus)
156+
#define IMAGING_ARENA_TLS thread_local
157+
#elif defined(HAVE_THREAD_LOCAL)
158+
#define IMAGING_ARENA_TLS thread_local
159+
#elif defined(HAVE__THREAD_LOCAL)
160+
#define IMAGING_ARENA_TLS _Thread_local
161+
#elif defined(HAVE___THREAD)
162+
#define IMAGING_ARENA_TLS __thread
163+
#elif defined(HAVE___DECLSPEC_THREAD_)
164+
#define IMAGING_ARENA_TLS __declspec(thread)
165+
#elif defined(Py_GIL_DISABLED)
166+
#define IMAGING_ARENA_TLS
167+
#define IMAGING_ARENA_LOCKING
168+
169+
#undef IMAGING_ARENA_LOCK
170+
#undef IMAGING_ARENA_UNLOCK
171+
172+
#define IMAGING_ARENA_LOCK(m) PyMutex_Lock(m)
173+
#define IMAGING_ARENA_UNLOCK(m) PyMutex_Unlock(m)
174+
#else
175+
#define IMAGING_ARENA_TLS
176+
#endif
177+
152178
typedef struct ImagingMemoryArena {
153179
int alignment; /* Alignment in memory of each line of an image */
154180
int block_size; /* Preferred block size, bytes */
@@ -161,15 +187,16 @@ typedef struct ImagingMemoryArena {
161187
int stats_reallocated_blocks; /* Number of blocks which were actually reallocated
162188
after retrieving */
163189
int stats_freed_blocks; /* Number of freed blocks */
164-
#ifdef Py_GIL_DISABLED
190+
191+
#ifdef IMAGING_ARENA_LOCKING
165192
PyMutex mutex;
166193
#endif
167194
} *ImagingMemoryArena;
168195

169196
/* Objects */
170197
/* ------- */
171198

172-
extern struct ImagingMemoryArena ImagingDefaultArena;
199+
extern IMAGING_ARENA_TLS struct ImagingMemoryArena ImagingDefaultArena;
173200
extern int
174201
ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max);
175202
extern void

src/libImaging/Storage.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,9 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
218218
break;
219219
}
220220

221-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
221+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
222222
ImagingDefaultArena.stats_new_count += 1;
223-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
223+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
224224

225225
return im;
226226
}
@@ -259,7 +259,7 @@ ImagingDelete(Imaging im) {
259259

260260
#define IMAGING_PAGE_SIZE (4096)
261261

262-
struct ImagingMemoryArena ImagingDefaultArena = {
262+
IMAGING_ARENA_TLS struct ImagingMemoryArena ImagingDefaultArena = {
263263
1, // alignment
264264
16 * 1024 * 1024, // block_size
265265
0, // blocks_max
@@ -270,7 +270,7 @@ struct ImagingMemoryArena ImagingDefaultArena = {
270270
0,
271271
0,
272272
0, // Stats
273-
#ifdef Py_GIL_DISABLED
273+
#ifdef IMAGING_ARENA_LOCKING
274274
{0},
275275
#endif
276276
};
@@ -369,12 +369,12 @@ ImagingDestroyArray(Imaging im) {
369369
int y = 0;
370370

371371
if (im->blocks) {
372-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
372+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
373373
while (im->blocks[y].ptr) {
374374
memory_return_block(&ImagingDefaultArena, im->blocks[y]);
375375
y += 1;
376376
}
377-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
377+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
378378
free(im->blocks);
379379
}
380380
}
@@ -504,21 +504,21 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) {
504504
return NULL;
505505
}
506506

507-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
507+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
508508
Imaging tmp = ImagingAllocateArray(
509509
im, &ImagingDefaultArena, dirty, ImagingDefaultArena.block_size
510510
);
511-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
511+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
512512
if (tmp) {
513513
return im;
514514
}
515515

516516
ImagingError_Clear();
517517

518518
// Try to allocate the image once more with smallest possible block size
519-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
519+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
520520
tmp = ImagingAllocateArray(im, &ImagingDefaultArena, dirty, IMAGING_PAGE_SIZE);
521-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
521+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
522522
if (tmp) {
523523
return im;
524524
}

0 commit comments

Comments
 (0)