Skip to content

Commit 5a3d902

Browse files
committed
test(core/protocols): add experimental single-pass json serializer
1 parent c87c1c1 commit 5a3d902

File tree

8 files changed

+241
-52
lines changed

8 files changed

+241
-52
lines changed

packages/core/src/submodules/protocols/cbor/CborCodec.spec.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ describe("performance baseline indicator", () => {
3636
/**
3737
* No assertion here.
3838
* In the initial dual-pass implementation,
39-
* par time is 0 to 10ms for up to 200746 bytes of CBOR. Up to 30 kb/ms. (kuhe's computer)
39+
* par time is 0 to 23ms for up to 381014 bytes of CBOR. Up to 15 kb/ms. (kuhe's computer)
4040
*/
4141
console.log("CborShapeSerializer performance timings", timings);
4242
});
@@ -65,8 +65,8 @@ describe("performance baseline indicator", () => {
6565
/**
6666
* No assertion here.
6767
* In the initial dual-pass implementation,
68-
* par time is 0 to 3ms for up to 100394 bytes of CBOR. Up to 45 kb/ms. (kuhe's computer)
68+
* par time is 0 to 9ms for up to 190550 bytes of CBOR. Up to 23 kb/ms. (kuhe's computer)
6969
*/
7070
console.log("CborShapeDeserializer performance timings", timings);
7171
});
72-
});
72+
}, 30_000);

packages/core/src/submodules/protocols/json/JsonShapeDeserializer.spec.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,9 @@ describe(JsonShapeDeserializer.name, () => {
190190
/**
191191
* No assertion here.
192192
* In the initial dual-pass implementation,
193-
* par time is 0 to 10ms for up to 135224 chars of JSON. Up to 20 kb/ms. (kuhe's computer)
193+
* par time is 0 to 25ms for up to 288899 chars of JSON. Up to 13 kb/ms. (kuhe's computer)
194194
*/
195195
console.log("JsonShapeDeserializer performance timings", timings);
196196
});
197-
});
197+
}, 30_000);
198198
});

packages/core/src/submodules/protocols/json/JsonShapeSerializer.spec.ts

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,19 @@ import type { TimestampEpochSecondsSchema } from "@smithy/types";
33
import { describe, expect, test as it } from "vitest";
44

55
import { createNestingWidget, nestingWidget, widget } from "../test-schema.spec";
6+
import { SinglePassJsonShapeSerializer } from "./experimental/SinglePassJsonShapeSerializer";
67
import { JsonShapeSerializer } from "./JsonShapeSerializer";
78

89
describe(JsonShapeSerializer.name, () => {
9-
const serializer = new JsonShapeSerializer({
10+
const serializer1 = new JsonShapeSerializer({
11+
jsonName: true,
12+
timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true },
13+
});
14+
15+
const serializer2 = new SinglePassJsonShapeSerializer({
1016
jsonName: true,
1117
timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true },
1218
});
13-
serializer.setSerdeContext({
14-
base64Encoder: (input: Uint8Array) => {
15-
return Buffer.from(input).toString("base64");
16-
},
17-
} as any);
1819

1920
it("serializes data to JSON", async () => {
2021
const data = {
@@ -23,45 +24,53 @@ describe(JsonShapeSerializer.name, () => {
2324
bigdecimal: new NumericValue("0.10000000000000000000000054321", "bigDecimal"),
2425
blob: new Uint8Array([0, 0, 0, 1]),
2526
};
26-
serializer.write(widget, data);
27-
const serialization = serializer.flush();
27+
serializer1.write(widget, data);
28+
const serialization = serializer1.flush();
2829
expect(serialization).toEqual(
2930
`{"blob":"AAAAAQ==","timestamp":0,"bigint":10000000000000000000000054321,"bigdecimal":0.10000000000000000000000054321}`
3031
);
3132
});
3233

3334
describe("performance baseline indicator", () => {
34-
it("should serialize objects", () => {
35-
const timings: string[] = [];
36-
const objects = [];
35+
for (const serializer of [serializer1, serializer2]) {
36+
it("should serialize objects", () => {
37+
const timings: string[] = [];
38+
const objects = [];
3739

38-
// warmup
39-
for (let i = 0; i < 13; ++i) {
40-
const o = createNestingWidget(2 ** i);
41-
objects.push(o);
42-
serializer.write(nestingWidget, o);
43-
serializer.flush();
44-
}
40+
// warmup
41+
for (let i = 0; i < 12; ++i) {
42+
const o = createNestingWidget(2 ** i);
43+
objects.push(o);
44+
serializer.write(nestingWidget, o);
45+
serializer.write(nestingWidget, o);
46+
serializer.write(nestingWidget, o);
47+
serializer.write(nestingWidget, o);
48+
serializer.flush();
49+
}
4550

46-
for (let i = 0; i < objects.length; ++i) {
47-
const o = objects[i];
51+
for (let i = 0; i < objects.length; ++i) {
52+
const o = objects[i];
4853

49-
const A = performance.now();
50-
serializer.write(nestingWidget, o);
51-
const serialization = serializer.flush();
52-
const B = performance.now();
54+
const A = performance.now();
55+
serializer.write(nestingWidget, o);
56+
const serialization = serializer.flush();
57+
const B = performance.now();
5358

54-
timings.push(
55-
`${B - A} (JSON length = ${serialization.length}, ${serialization.length / 1024 / (B - A)} kb/ms)`
56-
);
57-
}
59+
timings.push(
60+
`${B - A} (JSON length = ${serialization.length}, ${serialization.length / 1024 / (B - A)} kb/ms)`
61+
);
62+
}
5863

59-
/**
60-
* No assertion here.
61-
* In the initial dual-pass implementation,
62-
* par time is 0 to 30ms for up to 270392 chars of JSON. Up to 20 kb/ms. (kuhe's computer)
63-
*/
64-
console.log("JsonShapeSerializer performance timings", timings);
65-
});
66-
});
64+
/**
65+
* No assertion here.
66+
* In the initial dual-pass implementation,
67+
* par time is 0 to 30ms for up to 288899 chars of JSON. Up to 11 kb/ms. (kuhe's computer)
68+
*
69+
* In the single-pass implementation using string buildup,
70+
* par time is 0 to 51ms for up to 288899 chars of JSON. Up to 13 kb/ms. (kuhe's computer)
71+
*/
72+
console.log(`${serializer.constructor.name} performance timings`, timings);
73+
});
74+
}
75+
}, 30_000);
6776
});

packages/core/src/submodules/protocols/json/JsonShapeSerializer.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,7 @@ export class JsonShapeSerializer extends SerdeContextConfig implements ShapeSeri
102102
if (ns === this.rootSchema) {
103103
return value;
104104
}
105-
if (!this.serdeContext?.base64Encoder) {
106-
return toBase64(value);
107-
}
108-
return this.serdeContext?.base64Encoder(value);
105+
return (this.serdeContext?.base64Encoder ?? toBase64)(value);
109106
}
110107

111108
if ((ns.isTimestampSchema() || ns.isDocumentSchema()) && value instanceof Date) {
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
import { determineTimestampFormat } from "@smithy/core/protocols";
2+
import { NormalizedSchema } from "@smithy/core/schema";
3+
import { dateToUtcString, generateIdempotencyToken, LazyJsonString, NumericValue } from "@smithy/core/serde";
4+
import type {
5+
Schema,
6+
ShapeSerializer,
7+
TimestampDateTimeSchema,
8+
TimestampEpochSecondsSchema,
9+
TimestampHttpDateSchema,
10+
} from "@smithy/types";
11+
import { toBase64 } from "@smithy/util-base64";
12+
13+
import { SerdeContextConfig } from "../../ConfigurableSerdeContext";
14+
import type { JsonSettings } from "../JsonCodec";
15+
16+
/**
17+
* This implementation uses single-pass JSON serialization with JS code instead of
18+
* JSON.stringify.
19+
*
20+
* It isn't significantly faster than dual-pass ending with native JSON.stringify
21+
* that I would want to use it. It seems to be barely faster in some mid-range object
22+
* sizes but slower on the high end.
23+
*
24+
* @internal
25+
*/
26+
export class SinglePassJsonShapeSerializer extends SerdeContextConfig implements ShapeSerializer<string> {
27+
private buffer: any;
28+
private rootSchema: NormalizedSchema | undefined;
29+
30+
public constructor(public readonly settings: JsonSettings) {
31+
super();
32+
}
33+
34+
public write(schema: Schema, value: unknown): void {
35+
this.rootSchema = NormalizedSchema.of(schema);
36+
this.buffer = this.writeObject(this.rootSchema, value);
37+
}
38+
39+
/**
40+
* @internal
41+
*/
42+
public writeDiscriminatedDocument(schema: Schema, value: unknown): void {
43+
this.write(schema, value);
44+
if (typeof this.buffer === "object") {
45+
this.buffer.__type = NormalizedSchema.of(schema).getName(true);
46+
}
47+
}
48+
49+
public flush(): string {
50+
this.rootSchema = undefined;
51+
52+
return this.buffer;
53+
}
54+
55+
private writeObject(schema: Schema, value: unknown): string {
56+
if (value == undefined) {
57+
return "";
58+
}
59+
60+
let b = "";
61+
const ns = NormalizedSchema.of(schema);
62+
const sparse = !!ns.getMergedTraits().sparse;
63+
64+
if (Array.isArray(value) && (ns.isDocumentSchema() || ns.isListSchema())) {
65+
b += "[";
66+
for (let i = 0; i < value.length; ++i) {
67+
const item = value[i];
68+
if (item != null || sparse) {
69+
b += this.writeValue(ns.getValueSchema(), item);
70+
b += ",";
71+
}
72+
}
73+
} else if (ns.isStructSchema()) {
74+
b += "{";
75+
for (const [name, member] of ns.structIterator()) {
76+
const item = (value as any)[name];
77+
const targetKey = this.settings.jsonName ? member.getMergedTraits().jsonName ?? name : name;
78+
const serializableValue = this.writeValue(member, item);
79+
if (item != null || member.isIdempotencyToken()) {
80+
b += `"${targetKey}":${serializableValue}`;
81+
b += ",";
82+
}
83+
}
84+
} else if (ns.isMapSchema() || ns.isDocumentSchema()) {
85+
b += "{";
86+
for (const [k, v] of Object.entries(value)) {
87+
if (v != null || sparse) {
88+
b += `"${k}":${this.writeValue(ns, v)}`;
89+
b += ",";
90+
}
91+
}
92+
}
93+
94+
if (b[b.length - 1] === ",") {
95+
b = b.slice(0, -1);
96+
}
97+
if (b[0] === "[") {
98+
b += "]";
99+
}
100+
if (b[0] === "{") {
101+
b += "}";
102+
}
103+
return b;
104+
}
105+
106+
private writeValue(schema: Schema, value: unknown): string {
107+
const isObject = value !== null && typeof value === "object";
108+
109+
const ns = NormalizedSchema.of(schema);
110+
const quote = (_: string) => `"${_}"`;
111+
112+
if (
113+
(ns.isBlobSchema() && (value instanceof Uint8Array || typeof value === "string")) ||
114+
(ns.isDocumentSchema() && value instanceof Uint8Array)
115+
) {
116+
return quote((this.serdeContext?.base64Encoder ?? toBase64)(value));
117+
}
118+
119+
if ((ns.isTimestampSchema() || ns.isDocumentSchema()) && value instanceof Date) {
120+
const format = determineTimestampFormat(ns, this.settings);
121+
switch (format) {
122+
case 5 satisfies TimestampDateTimeSchema:
123+
return quote(value.toISOString().replace(".000Z", "Z"));
124+
case 6 satisfies TimestampHttpDateSchema:
125+
return quote(dateToUtcString(value));
126+
case 7 satisfies TimestampEpochSecondsSchema:
127+
return String(value.getTime() / 1000);
128+
default:
129+
console.warn("Missing timestamp format, using epoch seconds", value);
130+
return String(value.getTime() / 1000);
131+
}
132+
}
133+
134+
if (ns.isNumericSchema() && typeof value === "number") {
135+
if (Math.abs(value) === Infinity || isNaN(value)) {
136+
return quote(String(value));
137+
}
138+
}
139+
140+
if (ns.isStringSchema()) {
141+
if (typeof value === "undefined" && ns.isIdempotencyToken()) {
142+
return quote(generateIdempotencyToken());
143+
}
144+
145+
if (typeof value === "string") {
146+
const mediaType = ns.getMergedTraits().mediaType;
147+
148+
if (mediaType) {
149+
const isJson = mediaType === "application/json" || mediaType.endsWith("+json");
150+
if (isJson) {
151+
return quote(LazyJsonString.from(value).toString());
152+
}
153+
}
154+
}
155+
}
156+
157+
if (value instanceof NumericValue) {
158+
// ns can be BigDecimal or Document.
159+
return value.string;
160+
}
161+
162+
if (isObject) {
163+
return this.writeObject(ns, value);
164+
}
165+
166+
return typeof value === "string" ? quote(value) : String(value);
167+
}
168+
}

packages/core/src/submodules/protocols/test-schema.spec.ts

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,28 @@ export const nestingWidget: StaticStructureSchema = [
4141
"ns",
4242
"Struct",
4343
0,
44-
["string", "date", "blob", "nested"],
45-
[0 satisfies StringSchema, 4 satisfies TimestampDefaultSchema, 21 satisfies BlobSchema, () => nestingWidget],
44+
["string", "date", "blob", "number", "list", "map", "nested"],
45+
[
46+
0 satisfies StringSchema,
47+
4 satisfies TimestampDefaultSchema,
48+
21 satisfies BlobSchema,
49+
1 satisfies NumericSchema,
50+
64 | 1,
51+
128 | 0,
52+
() => nestingWidget,
53+
],
4654
];
4755

4856
export function createNestingWidget(nesting = 0) {
4957
const object = {
5058
string: "hello, world",
59+
number: 100000,
60+
list: [1, 2, 3, 4, 5, 6, 7, 8, 9],
61+
map: {
62+
a: "A",
63+
b: "B",
64+
c: "C",
65+
},
5166
date: new Date(0),
5267
blob: new Uint8Array([0, 1, 2, 3, 4, 5, 6, 7]),
5368
nested: undefined,

packages/core/src/submodules/protocols/xml/XmlShapeDeserializer.spec.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ describe(XmlShapeDeserializer.name, () => {
4646
const strings = [];
4747

4848
// warmup
49-
for (let i = 0; i < 13; ++i) {
49+
for (let i = 0; i < 12; ++i) {
5050
const o = createNestingWidget(2 ** i);
5151
serializer.write(nestingWidget, o);
5252
const json = serializer.flush();
@@ -65,9 +65,9 @@ describe(XmlShapeDeserializer.name, () => {
6565
/**
6666
* No assertion here.
6767
* In the initial dual-pass implementation,
68-
* par time is 0 to 45ms for up to 426106 chars of XML. Up to 10 kb/ms. (kuhe's computer)
68+
* par time is 0 to 187ms for up to 905676 chars of XML. Up to 10 kb/ms. (kuhe's computer)
6969
*/
7070
console.log("XmlShapeDeserializer performance timings", timings);
7171
});
72-
});
72+
}, 30_000);
7373
});

packages/core/src/submodules/protocols/xml/XmlShapeSerializer.spec.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ describe(XmlShapeSerializer.name, () => {
7070
/**
7171
* No assertion here.
7272
* In the initial dual-pass implementation,
73-
* par time is 0 to 170ms for up to 426106 chars of XML. Up to 25 kb/ms. (kuhe's computer)
73+
* par time is 0 to 600ms for up to 1810892 chars of XML. Up to 28 kb/ms. (kuhe's computer)
7474
*/
7575
console.log("XmlShapeSerializer performance timings", timings);
7676
});
77-
});
77+
}, 30_000);
7878
});

0 commit comments

Comments
 (0)