Skip to content

Commit fa6a6b0

Browse files
committed
Thread-local arenas
Currently, all threads use the same arena for imaging. This can result in a lot of contention when there are enough workers and the mutex is constantly being checked. This commit instead introduces lockless thread-local arenas for environments that support it.
1 parent f521a4b commit fa6a6b0

File tree

4 files changed

+115
-38
lines changed

4 files changed

+115
-38
lines changed

setup.py

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,21 @@
88
# ------------------------------
99
from __future__ import annotations
1010

11+
import distutils.ccompiler
1112
import os
1213
import re
1314
import shutil
1415
import struct
1516
import subprocess
1617
import sys
18+
import tempfile
1719
import warnings
1820
from collections.abc import Iterator
1921
from typing import Any
2022

2123
from setuptools import Extension, setup
2224
from setuptools.command.build_ext import build_ext
25+
from setuptools.errors import CompileError
2326

2427

2528
def get_version() -> str:
@@ -292,6 +295,47 @@ def _pkg_config(name: str) -> tuple[list[str], list[str]] | None:
292295
return None
293296

294297

298+
def _try_compile(compiler: distutils.ccompiler.CCompiler, code: str) -> bool:
299+
try:
300+
with tempfile.TemporaryDirectory() as d:
301+
fn = os.path.join(d, "test.c")
302+
with open(fn, "w") as f:
303+
f.write(code)
304+
compiler.compile([fn], output_dir=d, extra_preargs=["-Werror"])
305+
return True
306+
except CompileError:
307+
return False
308+
309+
310+
def _try_compile_attr(compiler: distutils.ccompiler.CCompiler, attr: str) -> bool:
311+
code = f"""
312+
#pragma GCC diagnostic error "-Wattributes"
313+
#pragma clang diagnostic error "-Wattributes"
314+
315+
int {attr} foo;
316+
int main() {{
317+
return 0;
318+
}}
319+
"""
320+
321+
return _try_compile(compiler, code)
322+
323+
324+
def _try_compile_tls_define_macros(
325+
compiler: distutils.ccompiler.CCompiler,
326+
) -> list[tuple[str, str | None]]:
327+
if _try_compile_attr(compiler, "thread_local"): # C23
328+
return [("HAVE_THREAD_LOCAL", None)]
329+
elif _try_compile_attr(compiler, "_Thread_local"): # C11/C17
330+
return [("HAVE__THREAD_LOCAL", None)]
331+
elif _try_compile_attr(compiler, "__thread"): # GCC/clang
332+
return [("HAVE___THREAD", None)]
333+
elif _try_compile_attr(compiler, "__declspec(thread)"): # MSVC
334+
return [("HAVE___DECLSPEC_THREAD_", None)]
335+
else:
336+
return []
337+
338+
295339
class pil_build_ext(build_ext):
296340
class ext_feature:
297341
features = [
@@ -426,13 +470,14 @@ def finalize_options(self) -> None:
426470
def _update_extension(
427471
self,
428472
name: str,
429-
libraries: list[str] | list[str | bool | None],
473+
libraries: list[str] | list[str | bool | None] | None = None,
430474
define_macros: list[tuple[str, str | None]] | None = None,
431475
sources: list[str] | None = None,
432476
) -> None:
433477
for extension in self.extensions:
434478
if extension.name == name:
435-
extension.libraries += libraries
479+
if libraries is not None:
480+
extension.libraries += libraries
436481
if define_macros is not None:
437482
extension.define_macros += define_macros
438483
if sources is not None:
@@ -890,7 +935,10 @@ def build_extensions(self) -> None:
890935

891936
defs.append(("PILLOW_VERSION", f'"{PILLOW_VERSION}"'))
892937

893-
self._update_extension("PIL._imaging", libs, defs)
938+
tls_define_macros = _try_compile_tls_define_macros(self.compiler)
939+
self._update_extension("PIL._imaging", libs, defs + tls_define_macros)
940+
self._update_extension("PIL._imagingmath", define_macros=tls_define_macros)
941+
self._update_extension("PIL._imagingmorph", define_macros=tls_define_macros)
894942

895943
#
896944
# additional libraries
@@ -913,7 +961,9 @@ def build_extensions(self) -> None:
913961
libs.append(feature.get("fribidi"))
914962
else: # building FriBiDi shim from src/thirdparty
915963
srcs.append("src/thirdparty/fribidi-shim/fribidi.c")
916-
self._update_extension("PIL._imagingft", libs, defs, srcs)
964+
self._update_extension(
965+
"PIL._imagingft", libs, defs + tls_define_macros, srcs
966+
)
917967

918968
else:
919969
self._remove_extension("PIL._imagingft")
@@ -922,19 +972,19 @@ def build_extensions(self) -> None:
922972
libs = [feature.get("lcms")]
923973
if sys.platform == "win32":
924974
libs.extend(["user32", "gdi32"])
925-
self._update_extension("PIL._imagingcms", libs)
975+
self._update_extension("PIL._imagingcms", libs, tls_define_macros)
926976
else:
927977
self._remove_extension("PIL._imagingcms")
928978

929979
webp = feature.get("webp")
930980
if isinstance(webp, str):
931981
libs = [webp, webp + "mux", webp + "demux"]
932-
self._update_extension("PIL._webp", libs)
982+
self._update_extension("PIL._webp", libs, tls_define_macros)
933983
else:
934984
self._remove_extension("PIL._webp")
935985

936986
tk_libs = ["psapi"] if sys.platform in ("win32", "cygwin") else []
937-
self._update_extension("PIL._imagingtk", tk_libs)
987+
self._update_extension("PIL._imagingtk", tk_libs, tls_define_macros)
938988

939989
build_ext.build_extensions(self)
940990

@@ -972,7 +1022,7 @@ def summary_report(self, feature: ext_feature) -> None:
9721022
(feature.get("tiff"), "LIBTIFF"),
9731023
(feature.get("freetype"), "FREETYPE2"),
9741024
(feature.get("raqm"), "RAQM (Text shaping)", raqm_extra_info),
975-
(feature.get("lcms"), "LITTLECMS2"),
1025+
(feature.get("lcms"), "LITTLECoMS2"),
9761026
(feature.get("webp"), "WEBP"),
9771027
(feature.get("xcb"), "XCB (X protocol)"),
9781028
]

src/_imaging.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3938,7 +3938,7 @@ _get_stats(PyObject *self, PyObject *args) {
39383938
return NULL;
39393939
}
39403940

3941-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
3941+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
39423942
ImagingMemoryArena arena = &ImagingDefaultArena;
39433943

39443944
v = PyLong_FromLong(arena->stats_new_count);
@@ -3965,7 +3965,7 @@ _get_stats(PyObject *self, PyObject *args) {
39653965
PyDict_SetItemString(d, "blocks_cached", v ? v : Py_None);
39663966
Py_XDECREF(v);
39673967

3968-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
3968+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
39693969
return d;
39703970
}
39713971

@@ -3975,14 +3975,14 @@ _reset_stats(PyObject *self, PyObject *args) {
39753975
return NULL;
39763976
}
39773977

3978-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
3978+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
39793979
ImagingMemoryArena arena = &ImagingDefaultArena;
39803980
arena->stats_new_count = 0;
39813981
arena->stats_allocated_blocks = 0;
39823982
arena->stats_reused_blocks = 0;
39833983
arena->stats_reallocated_blocks = 0;
39843984
arena->stats_freed_blocks = 0;
3985-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
3985+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
39863986

39873987
Py_INCREF(Py_None);
39883988
return Py_None;
@@ -3994,9 +3994,9 @@ _get_alignment(PyObject *self, PyObject *args) {
39943994
return NULL;
39953995
}
39963996

3997-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
3997+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
39983998
int alignment = ImagingDefaultArena.alignment;
3999-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
3999+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40004000
return PyLong_FromLong(alignment);
40014001
}
40024002

@@ -4006,9 +4006,9 @@ _get_block_size(PyObject *self, PyObject *args) {
40064006
return NULL;
40074007
}
40084008

4009-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4009+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40104010
int block_size = ImagingDefaultArena.block_size;
4011-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4011+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40124012
return PyLong_FromLong(block_size);
40134013
}
40144014

@@ -4018,9 +4018,9 @@ _get_blocks_max(PyObject *self, PyObject *args) {
40184018
return NULL;
40194019
}
40204020

4021-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4021+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40224022
int blocks_max = ImagingDefaultArena.blocks_max;
4023-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4023+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40244024
return PyLong_FromLong(blocks_max);
40254025
}
40264026

@@ -4041,9 +4041,9 @@ _set_alignment(PyObject *self, PyObject *args) {
40414041
return NULL;
40424042
}
40434043

4044-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4044+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40454045
ImagingDefaultArena.alignment = alignment;
4046-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4046+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40474047

40484048
Py_INCREF(Py_None);
40494049
return Py_None;
@@ -4066,9 +4066,9 @@ _set_block_size(PyObject *self, PyObject *args) {
40664066
return NULL;
40674067
}
40684068

4069-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4069+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40704070
ImagingDefaultArena.block_size = block_size;
4071-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4071+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40724072

40734073
Py_INCREF(Py_None);
40744074
return Py_None;
@@ -4092,9 +4092,9 @@ _set_blocks_max(PyObject *self, PyObject *args) {
40924092
return NULL;
40934093
}
40944094

4095-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4095+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
40964096
int status = ImagingMemorySetBlocksMax(&ImagingDefaultArena, blocks_max);
4097-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4097+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
40984098
if (!status) {
40994099
return ImagingError_MemoryError();
41004100
}
@@ -4111,9 +4111,9 @@ _clear_cache(PyObject *self, PyObject *args) {
41114111
return NULL;
41124112
}
41134113

4114-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
4114+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
41154115
ImagingMemoryClearCache(&ImagingDefaultArena, i);
4116-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
4116+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
41174117

41184118
Py_INCREF(Py_None);
41194119
return Py_None;

src/libImaging/Imaging.h

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,32 @@ struct ImagingPaletteInstance {
149149
int keep_cache; /* This palette will be reused; keep cache */
150150
};
151151

152+
#define IMAGING_ARENA_LOCK(m)
153+
#define IMAGING_ARENA_UNLOCK(m)
154+
155+
#if defined(__cplusplus)
156+
#define IMAGING_ARENA_TLS thread_local
157+
#elif defined(HAVE_THREAD_LOCAL)
158+
#define IMAGING_ARENA_TLS thread_local
159+
#elif defined(HAVE__THREAD_LOCAL)
160+
#define IMAGING_ARENA_TLS _Thread_local
161+
#elif defined(HAVE___THREAD)
162+
#define IMAGING_ARENA_TLS __thread
163+
#elif defined(HAVE___DECLSPEC_THREAD_)
164+
#define IMAGING_ARENA_TLS __declspec(thread)
165+
#elif defined(Py_GIL_DISABLED)
166+
#define IMAGING_ARENA_TLS
167+
#define IMAGING_ARENA_LOCKING
168+
169+
#undef IMAGING_ARENA_LOCK
170+
#undef IMAGING_ARENA_UNLOCK
171+
172+
#define IMAGING_ARENA_LOCK(m) PyMutex_Lock(m)
173+
#define IMAGING_ARENA_UNLOCK(m) PyMutex_Unlock(m)
174+
#else
175+
#define IMAGING_ARENA_TLS
176+
#endif
177+
152178
typedef struct ImagingMemoryArena {
153179
int alignment; /* Alignment in memory of each line of an image */
154180
int block_size; /* Preferred block size, bytes */
@@ -161,15 +187,16 @@ typedef struct ImagingMemoryArena {
161187
int stats_reallocated_blocks; /* Number of blocks which were actually reallocated
162188
after retrieving */
163189
int stats_freed_blocks; /* Number of freed blocks */
164-
#ifdef Py_GIL_DISABLED
190+
191+
#ifdef IMAGING_ARENA_LOCKING
165192
PyMutex mutex;
166193
#endif
167194
} *ImagingMemoryArena;
168195

169196
/* Objects */
170197
/* ------- */
171198

172-
extern struct ImagingMemoryArena ImagingDefaultArena;
199+
extern IMAGING_ARENA_TLS struct ImagingMemoryArena ImagingDefaultArena;
173200
extern int
174201
ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max);
175202
extern void

src/libImaging/Storage.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,9 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
218218
break;
219219
}
220220

221-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
221+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
222222
ImagingDefaultArena.stats_new_count += 1;
223-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
223+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
224224

225225
return im;
226226
}
@@ -259,7 +259,7 @@ ImagingDelete(Imaging im) {
259259

260260
#define IMAGING_PAGE_SIZE (4096)
261261

262-
struct ImagingMemoryArena ImagingDefaultArena = {
262+
IMAGING_ARENA_TLS struct ImagingMemoryArena ImagingDefaultArena = {
263263
1, // alignment
264264
16 * 1024 * 1024, // block_size
265265
0, // blocks_max
@@ -270,7 +270,7 @@ struct ImagingMemoryArena ImagingDefaultArena = {
270270
0,
271271
0,
272272
0, // Stats
273-
#ifdef Py_GIL_DISABLED
273+
#ifdef IMAGING_ARENA_LOCKING
274274
{0},
275275
#endif
276276
};
@@ -369,12 +369,12 @@ ImagingDestroyArray(Imaging im) {
369369
int y = 0;
370370

371371
if (im->blocks) {
372-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
372+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
373373
while (im->blocks[y].ptr) {
374374
memory_return_block(&ImagingDefaultArena, im->blocks[y]);
375375
y += 1;
376376
}
377-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
377+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
378378
free(im->blocks);
379379
}
380380
}
@@ -504,21 +504,21 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) {
504504
return NULL;
505505
}
506506

507-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
507+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
508508
Imaging tmp = ImagingAllocateArray(
509509
im, &ImagingDefaultArena, dirty, ImagingDefaultArena.block_size
510510
);
511-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
511+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
512512
if (tmp) {
513513
return im;
514514
}
515515

516516
ImagingError_Clear();
517517

518518
// Try to allocate the image once more with smallest possible block size
519-
MUTEX_LOCK(&ImagingDefaultArena.mutex);
519+
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
520520
tmp = ImagingAllocateArray(im, &ImagingDefaultArena, dirty, IMAGING_PAGE_SIZE);
521-
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
521+
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
522522
if (tmp) {
523523
return im;
524524
}

0 commit comments

Comments
 (0)