diff --git a/packages/core/src/submodules/protocols/cbor/CborCodec.spec.ts b/packages/core/src/submodules/protocols/cbor/CborCodec.spec.ts new file mode 100644 index 000000000000..c5438929c935 --- /dev/null +++ b/packages/core/src/submodules/protocols/cbor/CborCodec.spec.ts @@ -0,0 +1,72 @@ +import { CborCodec } from "@smithy/core/cbor"; +import { describe, test as it } from "vitest"; + +import { createNestingWidget, nestingWidget } from "../test-schema.spec"; + +describe("performance baseline indicator", () => { + const codec = new CborCodec(); + const serializer = codec.createSerializer(); + const deserializer = codec.createDeserializer(); + + it("should serialize objects", () => { + const timings: string[] = []; + const objects = []; + + // warmup + for (let i = 0; i < 13; ++i) { + const o = createNestingWidget(2 ** i); + objects.push(o); + serializer.write(nestingWidget, o); + serializer.flush(); + } + + for (let i = 0; i < objects.length; ++i) { + const o = objects[i]; + + const A = performance.now(); + serializer.write(nestingWidget, o); + const serialization = serializer.flush(); + const B = performance.now(); + + timings.push( + `${B - A} (byte length = ${serialization.byteLength}, ${serialization.byteLength / 1024 / (B - A)} kb/ms)` + ); + } + + /** + * No assertion here. + * In the initial dual-pass implementation, + * par time is 0 to 23ms for up to 381014 bytes of CBOR. Up to 15 kb/ms. (kuhe's computer) + */ + console.log("CborShapeSerializer performance timings", timings); + }); + + it("should deserialize bytes", async () => { + const timings: string[] = []; + const strings = []; + + // warmup + for (let i = 0; i < 12; ++i) { + const o = createNestingWidget(2 ** i); + serializer.write(nestingWidget, o); + const json = serializer.flush(); + strings.push(json); + await deserializer.read(nestingWidget, json); + } + + for (const s of strings) { + const A = performance.now(); + await deserializer.read(nestingWidget, s); + const B = performance.now(); + + timings.push(`${B - A} (byte length = ${s.byteLength}, ${s.byteLength / 1024 / (B - A)} kb/ms)`); + } + + /** + * No assertion here. + * In the initial dual-pass implementation, + * par time is 0 to 9ms for up to 190550 bytes of CBOR. Up to 23 kb/ms. (kuhe's computer) + */ + console.log("CborShapeDeserializer performance timings", timings); + }); +}, 30_000); diff --git a/packages/core/src/submodules/protocols/json/JsonShapeDeserializer.spec.ts b/packages/core/src/submodules/protocols/json/JsonShapeDeserializer.spec.ts index 231450c63588..56c639bf93ae 100644 --- a/packages/core/src/submodules/protocols/json/JsonShapeDeserializer.spec.ts +++ b/packages/core/src/submodules/protocols/json/JsonShapeDeserializer.spec.ts @@ -2,8 +2,9 @@ import { NumericValue } from "@smithy/core/serde"; import type { TimestampEpochSecondsSchema } from "@smithy/types"; import { describe, expect, test as it } from "vitest"; -import { widget } from "../test-schema.spec"; +import { createNestingWidget, nestingWidget, widget } from "../test-schema.spec"; import { JsonShapeDeserializer } from "./JsonShapeDeserializer"; +import { JsonShapeSerializer } from "./JsonShapeSerializer"; describe(JsonShapeDeserializer.name, () => { let contextSourceAvailable = false; @@ -153,4 +154,45 @@ describe(JsonShapeDeserializer.name, () => { expect(await deserializer.read(widget, JSON.stringify({ scalar: "-Infinity" }))).toEqual({ scalar: -Infinity }); expect(await deserializer.read(widget, JSON.stringify({ scalar: "NaN" }))).toEqual({ scalar: NaN }); }); + + describe("performance baseline indicator", () => { + const serializer = new JsonShapeSerializer({ + jsonName: true, + timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true }, + }); + serializer.setSerdeContext({ + base64Encoder: (input: Uint8Array) => { + return Buffer.from(input).toString("base64"); + }, + } as any); + + it("should deserialize JSON strings", async () => { + const timings: string[] = []; + const strings = []; + + // warmup + for (let i = 0; i < 12; ++i) { + const o = createNestingWidget(2 ** i); + serializer.write(nestingWidget, o); + const json = serializer.flush(); + strings.push(json); + await deserializer.read(nestingWidget, json); + } + + for (const s of strings) { + const A = performance.now(); + await deserializer.read(nestingWidget, s); + const B = performance.now(); + + timings.push(`${B - A} (JSON length = ${s.length}, ${s.length / 1024 / (B - A)} kb/ms)`); + } + + /** + * No assertion here. + * In the initial dual-pass implementation, + * par time is 0 to 25ms for up to 288899 chars of JSON. Up to 13 kb/ms. (kuhe's computer) + */ + console.log("JsonShapeDeserializer performance timings", timings); + }); + }, 30_000); }); diff --git a/packages/core/src/submodules/protocols/json/JsonShapeSerializer.spec.ts b/packages/core/src/submodules/protocols/json/JsonShapeSerializer.spec.ts index 0e6423381605..921215b003d6 100644 --- a/packages/core/src/submodules/protocols/json/JsonShapeSerializer.spec.ts +++ b/packages/core/src/submodules/protocols/json/JsonShapeSerializer.spec.ts @@ -2,31 +2,75 @@ import { NumericValue } from "@smithy/core/serde"; import type { TimestampEpochSecondsSchema } from "@smithy/types"; import { describe, expect, test as it } from "vitest"; -import { widget } from "../test-schema.spec"; +import { createNestingWidget, nestingWidget, widget } from "../test-schema.spec"; +import { SinglePassJsonShapeSerializer } from "./experimental/SinglePassJsonShapeSerializer"; import { JsonShapeSerializer } from "./JsonShapeSerializer"; describe(JsonShapeSerializer.name, () => { - it("serializes data to JSON", async () => { - const serializer = new JsonShapeSerializer({ - jsonName: true, - timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true }, - }); - serializer.setSerdeContext({ - base64Encoder: (input: Uint8Array) => { - return Buffer.from(input).toString("base64"); - }, - } as any); + const serializer1 = new JsonShapeSerializer({ + jsonName: true, + timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true }, + }); + + const serializer2 = new SinglePassJsonShapeSerializer({ + jsonName: true, + timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true }, + }); + it("serializes data to JSON", async () => { const data = { timestamp: new Date(0), bigint: 10000000000000000000000054321n, bigdecimal: new NumericValue("0.10000000000000000000000054321", "bigDecimal"), blob: new Uint8Array([0, 0, 0, 1]), }; - serializer.write(widget, data); - const serialization = serializer.flush(); + serializer1.write(widget, data); + const serialization = serializer1.flush(); expect(serialization).toEqual( `{"blob":"AAAAAQ==","timestamp":0,"bigint":10000000000000000000000054321,"bigdecimal":0.10000000000000000000000054321}` ); }); + + describe("performance baseline indicator", () => { + for (const serializer of [serializer1, serializer2]) { + it("should serialize objects", () => { + const timings: string[] = []; + const objects = []; + + // warmup + for (let i = 0; i < 12; ++i) { + const o = createNestingWidget(2 ** i); + objects.push(o); + serializer.write(nestingWidget, o); + serializer.write(nestingWidget, o); + serializer.write(nestingWidget, o); + serializer.write(nestingWidget, o); + serializer.flush(); + } + + for (let i = 0; i < objects.length; ++i) { + const o = objects[i]; + + const A = performance.now(); + serializer.write(nestingWidget, o); + const serialization = serializer.flush(); + const B = performance.now(); + + timings.push( + `${B - A} (JSON length = ${serialization.length}, ${serialization.length / 1024 / (B - A)} kb/ms)` + ); + } + + /** + * No assertion here. + * In the initial dual-pass implementation, + * par time is 0 to 30ms for up to 288899 chars of JSON. Up to 11 kb/ms. (kuhe's computer) + * + * In the single-pass implementation using string buildup, + * par time is 0 to 51ms for up to 288899 chars of JSON. Up to 13 kb/ms. (kuhe's computer) + */ + console.log(`${serializer.constructor.name} performance timings`, timings); + }); + } + }, 30_000); }); diff --git a/packages/core/src/submodules/protocols/json/JsonShapeSerializer.ts b/packages/core/src/submodules/protocols/json/JsonShapeSerializer.ts index 9f9678a166b4..ebba45091f86 100644 --- a/packages/core/src/submodules/protocols/json/JsonShapeSerializer.ts +++ b/packages/core/src/submodules/protocols/json/JsonShapeSerializer.ts @@ -102,10 +102,7 @@ export class JsonShapeSerializer extends SerdeContextConfig implements ShapeSeri if (ns === this.rootSchema) { return value; } - if (!this.serdeContext?.base64Encoder) { - return toBase64(value); - } - return this.serdeContext?.base64Encoder(value); + return (this.serdeContext?.base64Encoder ?? toBase64)(value); } if ((ns.isTimestampSchema() || ns.isDocumentSchema()) && value instanceof Date) { diff --git a/packages/core/src/submodules/protocols/json/experimental/SinglePassJsonShapeSerializer.ts b/packages/core/src/submodules/protocols/json/experimental/SinglePassJsonShapeSerializer.ts new file mode 100644 index 000000000000..da2a99a2804c --- /dev/null +++ b/packages/core/src/submodules/protocols/json/experimental/SinglePassJsonShapeSerializer.ts @@ -0,0 +1,168 @@ +import { determineTimestampFormat } from "@smithy/core/protocols"; +import { NormalizedSchema } from "@smithy/core/schema"; +import { dateToUtcString, generateIdempotencyToken, LazyJsonString, NumericValue } from "@smithy/core/serde"; +import type { + Schema, + ShapeSerializer, + TimestampDateTimeSchema, + TimestampEpochSecondsSchema, + TimestampHttpDateSchema, +} from "@smithy/types"; +import { toBase64 } from "@smithy/util-base64"; + +import { SerdeContextConfig } from "../../ConfigurableSerdeContext"; +import type { JsonSettings } from "../JsonCodec"; + +/** + * This implementation uses single-pass JSON serialization with JS code instead of + * JSON.stringify. + * + * It isn't significantly faster than dual-pass ending with native JSON.stringify + * that I would want to use it. It seems to be barely faster in some mid-range object + * sizes but slower on the high end. + * + * @internal + */ +export class SinglePassJsonShapeSerializer extends SerdeContextConfig implements ShapeSerializer { + private buffer: any; + private rootSchema: NormalizedSchema | undefined; + + public constructor(public readonly settings: JsonSettings) { + super(); + } + + public write(schema: Schema, value: unknown): void { + this.rootSchema = NormalizedSchema.of(schema); + this.buffer = this.writeObject(this.rootSchema, value); + } + + /** + * @internal + */ + public writeDiscriminatedDocument(schema: Schema, value: unknown): void { + this.write(schema, value); + if (typeof this.buffer === "object") { + this.buffer.__type = NormalizedSchema.of(schema).getName(true); + } + } + + public flush(): string { + this.rootSchema = undefined; + + return this.buffer; + } + + private writeObject(schema: Schema, value: unknown): string { + if (value == undefined) { + return ""; + } + + let b = ""; + const ns = NormalizedSchema.of(schema); + const sparse = !!ns.getMergedTraits().sparse; + + if (Array.isArray(value) && (ns.isDocumentSchema() || ns.isListSchema())) { + b += "["; + for (let i = 0; i < value.length; ++i) { + const item = value[i]; + if (item != null || sparse) { + b += this.writeValue(ns.getValueSchema(), item); + b += ","; + } + } + } else if (ns.isStructSchema()) { + b += "{"; + for (const [name, member] of ns.structIterator()) { + const item = (value as any)[name]; + const targetKey = this.settings.jsonName ? member.getMergedTraits().jsonName ?? name : name; + const serializableValue = this.writeValue(member, item); + if (item != null || member.isIdempotencyToken()) { + b += `"${targetKey}":${serializableValue}`; + b += ","; + } + } + } else if (ns.isMapSchema() || ns.isDocumentSchema()) { + b += "{"; + for (const [k, v] of Object.entries(value)) { + if (v != null || sparse) { + b += `"${k}":${this.writeValue(ns, v)}`; + b += ","; + } + } + } + + if (b[b.length - 1] === ",") { + b = b.slice(0, -1); + } + if (b[0] === "[") { + b += "]"; + } + if (b[0] === "{") { + b += "}"; + } + return b; + } + + private writeValue(schema: Schema, value: unknown): string { + const isObject = value !== null && typeof value === "object"; + + const ns = NormalizedSchema.of(schema); + const quote = (_: string) => `"${_}"`; + + if ( + (ns.isBlobSchema() && (value instanceof Uint8Array || typeof value === "string")) || + (ns.isDocumentSchema() && value instanceof Uint8Array) + ) { + return quote((this.serdeContext?.base64Encoder ?? toBase64)(value)); + } + + if ((ns.isTimestampSchema() || ns.isDocumentSchema()) && value instanceof Date) { + const format = determineTimestampFormat(ns, this.settings); + switch (format) { + case 5 satisfies TimestampDateTimeSchema: + return quote(value.toISOString().replace(".000Z", "Z")); + case 6 satisfies TimestampHttpDateSchema: + return quote(dateToUtcString(value)); + case 7 satisfies TimestampEpochSecondsSchema: + return String(value.getTime() / 1000); + default: + console.warn("Missing timestamp format, using epoch seconds", value); + return String(value.getTime() / 1000); + } + } + + if (ns.isNumericSchema() && typeof value === "number") { + if (Math.abs(value) === Infinity || isNaN(value)) { + return quote(String(value)); + } + } + + if (ns.isStringSchema()) { + if (typeof value === "undefined" && ns.isIdempotencyToken()) { + return quote(generateIdempotencyToken()); + } + + if (typeof value === "string") { + const mediaType = ns.getMergedTraits().mediaType; + + if (mediaType) { + const isJson = mediaType === "application/json" || mediaType.endsWith("+json"); + if (isJson) { + return quote(LazyJsonString.from(value).toString()); + } + } + } + } + + if (value instanceof NumericValue) { + // ns can be BigDecimal or Document. + return value.string; + } + + if (isObject) { + return this.writeObject(ns, value); + } + + return typeof value === "string" ? quote(value) : String(value); + } +} diff --git a/packages/core/src/submodules/protocols/test-schema.spec.ts b/packages/core/src/submodules/protocols/test-schema.spec.ts index 81d7e557910c..2bd3ebcda592 100644 --- a/packages/core/src/submodules/protocols/test-schema.spec.ts +++ b/packages/core/src/submodules/protocols/test-schema.spec.ts @@ -6,6 +6,8 @@ import type { StaticListSchema, StaticOperationSchema, StaticStructureSchema, + StringSchema, + TimestampDefaultSchema, TimestampEpochSecondsSchema, } from "@smithy/types"; import { describe, test as it } from "vitest"; @@ -34,6 +36,43 @@ export const widget = [ ], ] satisfies StaticStructureSchema; +export const nestingWidget: StaticStructureSchema = [ + 3, + "ns", + "Struct", + 0, + ["string", "date", "blob", "number", "list", "map", "nested"], + [ + 0 satisfies StringSchema, + 4 satisfies TimestampDefaultSchema, + 21 satisfies BlobSchema, + 1 satisfies NumericSchema, + 64 | 1, + 128 | 0, + () => nestingWidget, + ], +]; + +export function createNestingWidget(nesting = 0) { + const object = { + string: "hello, world", + number: 100000, + list: [1, 2, 3, 4, 5, 6, 7, 8, 9], + map: { + a: "A", + b: "B", + c: "C", + }, + date: new Date(0), + blob: new Uint8Array([0, 1, 2, 3, 4, 5, 6, 7]), + nested: undefined, + } as any; + if (nesting > 0) { + object.nested = createNestingWidget(nesting - 1); + } + return object; +} + export const deleteObjects: StaticOperationSchema = [ 9, "ns", diff --git a/packages/core/src/submodules/protocols/xml/XmlShapeDeserializer.spec.ts b/packages/core/src/submodules/protocols/xml/XmlShapeDeserializer.spec.ts index 7bdfd27f0ec4..41e92bc43427 100644 --- a/packages/core/src/submodules/protocols/xml/XmlShapeDeserializer.spec.ts +++ b/packages/core/src/submodules/protocols/xml/XmlShapeDeserializer.spec.ts @@ -2,10 +2,28 @@ import { NumericValue } from "@smithy/core/serde"; import type { TimestampDateTimeSchema } from "@smithy/types"; import { describe, expect, test as it } from "vitest"; -import { widget } from "../test-schema.spec"; +import { createNestingWidget, nestingWidget, widget } from "../test-schema.spec"; import { XmlShapeDeserializer } from "./XmlShapeDeserializer"; +import { XmlShapeSerializer } from "./XmlShapeSerializer"; + +describe(XmlShapeDeserializer.name, () => { + const deserializer = new XmlShapeDeserializer({ + httpBindings: true, + serviceNamespace: "namespace", + timestampFormat: { default: 5 satisfies TimestampDateTimeSchema, useTrait: true }, + xmlNamespace: "namespace", + }); + const serializer = new XmlShapeSerializer({ + xmlNamespace: "namespace", + serviceNamespace: "namespace", + timestampFormat: { default: 5 satisfies TimestampDateTimeSchema, useTrait: true }, + }); + serializer.setSerdeContext({ + base64Encoder: (input: Uint8Array) => { + return Buffer.from(input).toString("base64"); + }, + } as any); -describe("", () => { it("placeholder", async () => { const xml = ` QUFBQQ== @@ -13,13 +31,6 @@ describe("", () => { 10000000000000000000000054321 0.10000000000000000000000054321 `; - const deserializer = new XmlShapeDeserializer({ - httpBindings: true, - serviceNamespace: "namespace", - timestampFormat: { default: 5 satisfies TimestampDateTimeSchema, useTrait: true }, - xmlNamespace: "namespace", - }); - const result = await deserializer.read(widget, xml); expect(result).toEqual({ blob: new Uint8Array([65, 65, 65, 65]), @@ -28,4 +39,35 @@ describe("", () => { bigdecimal: new NumericValue("0.10000000000000000000000054321", "bigDecimal"), }); }); + + describe("performance baseline indicator", () => { + it("should deserialize XML strings", async () => { + const timings: string[] = []; + const strings = []; + + // warmup + for (let i = 0; i < 12; ++i) { + const o = createNestingWidget(2 ** i); + serializer.write(nestingWidget, o); + const json = serializer.flush(); + strings.push(json); + await deserializer.read(nestingWidget, json); + } + + for (const s of strings) { + const A = performance.now(); + await deserializer.read(nestingWidget, s); + const B = performance.now(); + + timings.push(`${B - A} (XML length = ${s.length}, ${s.length / 1024 / (B - A)} kb/ms)`); + } + + /** + * No assertion here. + * In the initial dual-pass implementation, + * par time is 0 to 187ms for up to 905676 chars of XML. Up to 10 kb/ms. (kuhe's computer) + */ + console.log("XmlShapeDeserializer performance timings", timings); + }); + }, 30_000); }); diff --git a/packages/core/src/submodules/protocols/xml/XmlShapeSerializer.spec.ts b/packages/core/src/submodules/protocols/xml/XmlShapeSerializer.spec.ts index 90e3280d7e4b..378ecbd0003a 100644 --- a/packages/core/src/submodules/protocols/xml/XmlShapeSerializer.spec.ts +++ b/packages/core/src/submodules/protocols/xml/XmlShapeSerializer.spec.ts @@ -2,23 +2,23 @@ import { NumericValue } from "@smithy/core/serde"; import type { TimestampDateTimeSchema } from "@smithy/types"; import { describe, expect, test as it } from "vitest"; -import { widget } from "../test-schema.spec"; +import { createNestingWidget, nestingWidget, widget } from "../test-schema.spec"; import { simpleFormatXml } from "./simpleFormatXml"; import { XmlShapeSerializer } from "./XmlShapeSerializer"; describe(XmlShapeSerializer.name, () => { - it("serializes data to Query", async () => { - const serializer = new XmlShapeSerializer({ - xmlNamespace: "namespace", - serviceNamespace: "namespace", - timestampFormat: { default: 5 satisfies TimestampDateTimeSchema, useTrait: true }, - }); - serializer.setSerdeContext({ - base64Encoder: (input: Uint8Array) => { - return Buffer.from(input).toString("base64"); - }, - } as any); + const serializer = new XmlShapeSerializer({ + xmlNamespace: "namespace", + serviceNamespace: "namespace", + timestampFormat: { default: 5 satisfies TimestampDateTimeSchema, useTrait: true }, + }); + serializer.setSerdeContext({ + base64Encoder: (input: Uint8Array) => { + return Buffer.from(input).toString("base64"); + }, + } as any); + it("serializes data to Query", async () => { const data = { timestamp: new Date(0), bigint: 10000000000000000000000054321n, @@ -42,4 +42,37 @@ describe(XmlShapeSerializer.name, () => { `); }); + + describe("performance baseline indicator", () => { + it("should serialize objects", () => { + const timings: string[] = []; + const objects = []; + + // warmup + for (let i = 0; i < 13; ++i) { + const o = createNestingWidget(2 ** i); + objects.push(o); + serializer.write(nestingWidget, o); + serializer.flush(); + } + + for (let i = 0; i < objects.length; ++i) { + const o = objects[i]; + + const A = performance.now(); + serializer.write(nestingWidget, o); + const serialization = serializer.flush(); + const B = performance.now(); + + timings.push(`${B - A} (XML length = ${serialization.length}, ${serialization.length / 1024 / (B - A)} kb/ms)`); + } + + /** + * No assertion here. + * In the initial dual-pass implementation, + * par time is 0 to 600ms for up to 1810892 chars of XML. Up to 28 kb/ms. (kuhe's computer) + */ + console.log("XmlShapeSerializer performance timings", timings); + }); + }, 30_000); });