Skip to content

Commit ee43cd3

Browse files
fleming79Carreauhenryiiipre-commit-ci[bot]
authored
Faster message serialization (#1064)
* Add orjson_packer and orjson_unpacker to speed up serialization of session messages. * Refactor orjson packer to use functools.partial. * Add msgpack support for message serialization in Session * Refactor packer/unpacker change handling for improved readability * Fix test_serialize_objects datetime checks on ci to compare using datetime format. * Fix datetime deserialization in test_serialize_objects to use dateutil.parser.isoparse * Add PicklingError to exception handling in test_cannot_serialize * Update api docs * Replace dateutil.parser.isoparse with jsonutil.parse_date in test_serialize_objects for datetime validation * Use rep in fstring Co-authored-by: M Bussonnier <[email protected]> * Add msgpack as a test dependency. * Fallback to json_packer and json_unpacker for orjson to handle for better resilience. * Fix: test_args checking for removed _default_pack_unpack and _default_pack_unpack. * Add type annotation to orjson_packer. * Add the other missing type annoatation. * Change orjson from a dependency to an optional dependency. Test against the minimum version in CI. * Double timeout for test_minimum_verisons * Fix invalid argument name. * Add orjson and msgpack as additional_dependencies for mypy in .pre-commit-config.yaml. * Remove # type:ignore[import-not-found]. * Should return result of orjson.loads. * fix: get mypy working with orjson/msgpack Signed-off-by: Henry Schreiner <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix for previous refactor. --------- Signed-off-by: Henry Schreiner <[email protected]> Co-authored-by: M Bussonnier <[email protected]> Co-authored-by: Alan <> Co-authored-by: Henry Schreiner <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 0bce36f commit ee43cd3

File tree

5 files changed

+136
-80
lines changed

5 files changed

+136
-80
lines changed

.github/workflows/main.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ jobs:
126126
test_minimum_verisons:
127127
name: Test Minimum Versions
128128
runs-on: ubuntu-latest
129-
timeout-minutes: 10
129+
timeout-minutes: 20
130130
steps:
131131
- uses: actions/checkout@v4
132132
- uses: jupyterlab/maintainer-tools/.github/actions/base-setup@v1
@@ -143,6 +143,11 @@ jobs:
143143
run: |
144144
hatch -vv run test:nowarn
145145
146+
- name: Run the unit tests with orjson installed
147+
run: |
148+
hatch -e test run pip install orjson
149+
hatch -vv run test:nowarn
150+
146151
test_prereleases:
147152
name: Test Prereleases
148153
timeout-minutes: 10

.pre-commit-config.yaml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,18 @@ repos:
3737
types_or: [yaml, html, json]
3838

3939
- repo: https://github.com/pre-commit/mirrors-mypy
40-
rev: "v1.18.2"
40+
rev: "v1.19.0"
4141
hooks:
4242
- id: mypy
4343
files: jupyter_client
4444
stages: [manual]
4545
args: ["--install-types", "--non-interactive"]
4646
additional_dependencies:
47-
["traitlets>=5.13", "ipykernel>=6.26", "jupyter_core>=5.3.2"]
47+
- traitlets>=5.13
48+
- ipykernel>=6.26
49+
- jupyter_core>=5.3.2
50+
- orjson>=3.11.4
51+
- msgpack-types
4852

4953
- repo: https://github.com/adamchainz/blacken-docs
5054
rev: "1.20.0"

jupyter_client/session.py

Lines changed: 66 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# Distributed under the terms of the Modified BSD License.
1414
from __future__ import annotations
1515

16+
import functools
1617
import hashlib
1718
import hmac
1819
import json
@@ -33,6 +34,7 @@
3334
from traitlets import (
3435
Any,
3536
Bool,
37+
Callable,
3638
CBytes,
3739
CUnicode,
3840
Dict,
@@ -125,15 +127,48 @@ def json_unpacker(s: str | bytes) -> t.Any:
125127
return json.loads(s)
126128

127129

130+
try:
131+
import orjson
132+
except ModuleNotFoundError:
133+
has_orjson = False
134+
orjson_packer, orjson_unpacker = json_packer, json_unpacker
135+
else:
136+
has_orjson = True
137+
138+
def orjson_packer(
139+
obj: t.Any, *, option: int | None = orjson.OPT_NAIVE_UTC | orjson.OPT_UTC_Z
140+
) -> bytes:
141+
"""Convert a json object to a bytes using orjson with fallback to json_packer."""
142+
try:
143+
return orjson.dumps(obj, default=json_default, option=option)
144+
except Exception:
145+
return json_packer(obj)
146+
147+
def orjson_unpacker(s: str | bytes) -> t.Any:
148+
"""Convert a json bytes or string to an object using orjson with fallback to json_unpacker."""
149+
try:
150+
return orjson.loads(s)
151+
except Exception:
152+
return json_unpacker(s)
153+
154+
155+
try:
156+
import msgpack
157+
except ModuleNotFoundError:
158+
has_msgpack = False
159+
else:
160+
has_msgpack = True
161+
msgpack_packer = functools.partial(msgpack.packb, default=json_default)
162+
msgpack_unpacker = msgpack.unpackb
163+
164+
128165
def pickle_packer(o: t.Any) -> bytes:
129166
"""Pack an object using the pickle module."""
130167
return pickle.dumps(squash_dates(o), PICKLE_PROTOCOL)
131168

132169

133170
pickle_unpacker = pickle.loads
134171

135-
default_packer = json_packer
136-
default_unpacker = json_unpacker
137172

138173
DELIM = b"<IDS|MSG>"
139174
# singleton dummy tracker, which will always report as done
@@ -316,7 +351,7 @@ class Session(Configurable):
316351
317352
debug : bool
318353
whether to trigger extra debugging statements
319-
packer/unpacker : str : 'json', 'pickle' or import_string
354+
packer/unpacker : str : 'orjson', 'json', 'pickle', 'msgpack' or import_string
320355
importstrings for methods to serialize message parts. If just
321356
'json' or 'pickle', predefined JSON and pickle packers will be used.
322357
Otherwise, the entire importstring must be used.
@@ -351,48 +386,42 @@ class Session(Configurable):
351386
""",
352387
)
353388

389+
# serialization traits:
354390
packer = DottedObjectName(
355-
"json",
391+
"orjson" if has_orjson else "json",
356392
config=True,
357393
help="""The name of the packer for serializing messages.
358394
Should be one of 'json', 'pickle', or an import name
359395
for a custom callable serializer.""",
360396
)
361-
362-
@observe("packer")
363-
def _packer_changed(self, change: t.Any) -> None:
364-
new = change["new"]
365-
if new.lower() == "json":
366-
self.pack = json_packer
367-
self.unpack = json_unpacker
368-
self.unpacker = new
369-
elif new.lower() == "pickle":
370-
self.pack = pickle_packer
371-
self.unpack = pickle_unpacker
372-
self.unpacker = new
373-
else:
374-
self.pack = import_item(str(new))
375-
376397
unpacker = DottedObjectName(
377-
"json",
398+
"orjson" if has_orjson else "json",
378399
config=True,
379400
help="""The name of the unpacker for unserializing messages.
380401
Only used with custom functions for `packer`.""",
381402
)
382-
383-
@observe("unpacker")
384-
def _unpacker_changed(self, change: t.Any) -> None:
385-
new = change["new"]
386-
if new.lower() == "json":
387-
self.pack = json_packer
388-
self.unpack = json_unpacker
389-
self.packer = new
390-
elif new.lower() == "pickle":
391-
self.pack = pickle_packer
392-
self.unpack = pickle_unpacker
393-
self.packer = new
403+
pack = Callable(orjson_packer if has_orjson else json_packer) # the actual packer function
404+
unpack = Callable(
405+
orjson_unpacker if has_orjson else json_unpacker
406+
) # the actual unpacker function
407+
408+
@observe("packer", "unpacker")
409+
def _packer_unpacker_changed(self, change: t.Any) -> None:
410+
new = change["new"].lower()
411+
if new == "orjson" and has_orjson:
412+
self.pack, self.unpack = orjson_packer, orjson_unpacker
413+
elif new == "json" or new == "orjson":
414+
self.pack, self.unpack = json_packer, json_unpacker
415+
elif new == "pickle":
416+
self.pack, self.unpack = pickle_packer, pickle_unpacker
417+
elif new == "msgpack" and has_msgpack:
418+
self.pack, self.unpack = msgpack_packer, msgpack_unpacker
394419
else:
395-
self.unpack = import_item(str(new))
420+
obj = import_item(str(change["new"]))
421+
name = "pack" if change["name"] == "packer" else "unpack"
422+
self.set_trait(name, obj)
423+
return
424+
self.packer = self.unpacker = change["new"]
396425

397426
session = CUnicode("", config=True, help="""The UUID identifying this session.""")
398427

@@ -417,8 +446,7 @@ def _session_changed(self, change: t.Any) -> None:
417446
metadata = Dict(
418447
{},
419448
config=True,
420-
help="Metadata dictionary, which serves as the default top-level metadata dict for each "
421-
"message.",
449+
help="Metadata dictionary, which serves as the default top-level metadata dict for each message.",
422450
)
423451

424452
# if 0, no adapting to do.
@@ -487,25 +515,6 @@ def _keyfile_changed(self, change: t.Any) -> None:
487515
# for protecting against sends from forks
488516
pid = Integer()
489517

490-
# serialization traits:
491-
492-
pack = Any(default_packer) # the actual packer function
493-
494-
@observe("pack")
495-
def _pack_changed(self, change: t.Any) -> None:
496-
new = change["new"]
497-
if not callable(new):
498-
raise TypeError("packer must be callable, not %s" % type(new))
499-
500-
unpack = Any(default_unpacker) # the actual packer function
501-
502-
@observe("unpack")
503-
def _unpack_changed(self, change: t.Any) -> None:
504-
# unpacker is not checked - it is assumed to be
505-
new = change["new"]
506-
if not callable(new):
507-
raise TypeError("unpacker must be callable, not %s" % type(new))
508-
509518
# thresholds:
510519
copy_threshold = Integer(
511520
2**16,
@@ -515,8 +524,7 @@ def _unpack_changed(self, change: t.Any) -> None:
515524
buffer_threshold = Integer(
516525
MAX_BYTES,
517526
config=True,
518-
help="Threshold (in bytes) beyond which an object's buffer should be extracted to avoid "
519-
"pickling.",
527+
help="Threshold (in bytes) beyond which an object's buffer should be extracted to avoid pickling.",
520528
)
521529
item_threshold = Integer(
522530
MAX_ITEMS,
@@ -534,7 +542,7 @@ def __init__(self, **kwargs: t.Any) -> None:
534542
535543
debug : bool
536544
whether to trigger extra debugging statements
537-
packer/unpacker : str : 'json', 'pickle' or import_string
545+
packer/unpacker : str : 'orjson', 'json', 'pickle', 'msgpack' or import_string
538546
importstrings for methods to serialize message parts. If just
539547
'json' or 'pickle', predefined JSON and pickle packers will be used.
540548
Otherwise, the entire importstring must be used.
@@ -626,10 +634,7 @@ def _check_packers(self) -> None:
626634
unpacked = unpack(packed)
627635
assert unpacked == msg_list
628636
except Exception as e:
629-
msg = (
630-
f"unpacker '{self.unpacker}' could not handle output from packer"
631-
f" '{self.packer}': {e}"
632-
)
637+
msg = f"unpacker {self.unpacker!r} could not handle output from packer {self.packer!r}: {e}"
633638
raise ValueError(msg) from e
634639

635640
# check datetime support

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ test = [
5555
"pytest-jupyter[client]>=0.6.2",
5656
"pytest-cov",
5757
"pytest-timeout",
58+
"msgpack"
5859
]
5960
docs = [
6061
"ipykernel",
@@ -65,6 +66,7 @@ docs = [
6566
"sphinxcontrib-spelling",
6667
"sphinx-autodoc-typehints",
6768
]
69+
orjson = ["orjson"] # When orjson is installed it will be used for faster pack and unpack
6870

6971
[project.scripts]
7072
jupyter-kernelspec = "jupyter_client.kernelspecapp:KernelSpecApp.launch_instance"

0 commit comments

Comments
 (0)