diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py index 39470754003bb6..64291d3fd66e20 100644 --- a/Lib/test/test_json/test_dump.py +++ b/Lib/test/test_json/test_dump.py @@ -65,6 +65,39 @@ def __lt__(self, o): d[1337] = "true.dat" self.assertEqual(self.dumps(d, sort_keys=True), '{"1337": "true.dat"}') + def test_mutate_items_during_encode(self): + c_make_encoder = getattr(self.json.encoder, 'c_make_encoder', None) + if c_make_encoder is None: + self.skipTest("c_make_encoder not available") + + cache = [] + + class BadDict(dict): + def __init__(self): + super().__init__(real=1) + + def items(self): + entries = [("boom", object())] + cache.append(entries) + return entries + + def encode_str(obj): + if cache: + cache.pop().clear() + return '"x"' + + encoder = c_make_encoder( + None, lambda o: "null", + encode_str, None, + ": ", ", ", False, + False, True + ) + + try: + encoder(BadDict(), 0) + except (ValueError, RuntimeError): + pass + class TestPyDump(TestDump, PyTest): pass diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst new file mode 100644 index 00000000000000..5fa3cd2727a9e5 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst @@ -0,0 +1,2 @@ +Fix a crash in the :mod:`json` module where a use-after-free could occur if +the object being encoded is modified during serialization. diff --git a/Modules/_json.c b/Modules/_json.c index 14714d4b346546..993ee72d8a0e6f 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1733,15 +1733,14 @@ _encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *key, *value; for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { PyObject *item = PyList_GET_ITEM(items, i); -#ifdef Py_GIL_DISABLED - // gh-119438: in the free-threading build the critical section on items can get suspended + + // GH-142831: The item must be strong-referenced to avoid + // use-after-free if the user code modifies the list during iteration. Py_INCREF(item); -#endif + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); -#ifdef Py_GIL_DISABLED Py_DECREF(item); -#endif return -1; } @@ -1750,14 +1749,10 @@ _encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, if (encoder_encode_key_value(s, writer, first, dct, key, value, indent_level, indent_cache, separator) < 0) { -#ifdef Py_GIL_DISABLED Py_DECREF(item); -#endif return -1; } -#ifdef Py_GIL_DISABLED Py_DECREF(item); -#endif } return 0; @@ -1772,24 +1767,20 @@ _encoder_iterate_dict_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *key, *value; Py_ssize_t pos = 0; while (PyDict_Next(dct, &pos, &key, &value)) { -#ifdef Py_GIL_DISABLED - // gh-119438: in the free-threading build the critical section on dct can get suspended + // GH-142831: The key and value must be strong-referenced to avoid + // use-after-free if the user code modifies the dict during iteration. Py_INCREF(key); Py_INCREF(value); -#endif + if (encoder_encode_key_value(s, writer, first, dct, key, value, indent_level, indent_cache, separator) < 0) { -#ifdef Py_GIL_DISABLED Py_DECREF(key); Py_DECREF(value); -#endif return -1; } -#ifdef Py_GIL_DISABLED Py_DECREF(key); Py_DECREF(value); -#endif } return 0; } @@ -1893,28 +1884,23 @@ _encoder_iterate_fast_seq_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, { for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); -#ifdef Py_GIL_DISABLED - // gh-119438: in the free-threading build the critical section on s_fast can get suspended + + // GH-142831: The object must be strong-referenced to avoid use-after-free + // if the user code modifies the sequence during iteration. Py_INCREF(obj); -#endif + if (i) { if (PyUnicodeWriter_WriteStr(writer, separator) < 0) { -#ifdef Py_GIL_DISABLED Py_DECREF(obj); -#endif return -1; } } if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { _PyErr_FormatNote("when serializing %T item %zd", seq, i); -#ifdef Py_GIL_DISABLED Py_DECREF(obj); -#endif return -1; } -#ifdef Py_GIL_DISABLED Py_DECREF(obj); -#endif } return 0; }