Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions packages/core/src/submodules/protocols/cbor/CborCodec.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { CborCodec } from "@smithy/core/cbor";
import { describe, test as it } from "vitest";

import { createNestingWidget, nestingWidget } from "../test-schema.spec";

describe("performance baseline indicator", () => {
const codec = new CborCodec();
const serializer = codec.createSerializer();
const deserializer = codec.createDeserializer();

it("should serialize objects", () => {
const timings: string[] = [];
const objects = [];

// warmup
for (let i = 0; i < 13; ++i) {
const o = createNestingWidget(2 ** i);
objects.push(o);
serializer.write(nestingWidget, o);
serializer.flush();
}

for (let i = 0; i < objects.length; ++i) {
const o = objects[i];

const A = performance.now();
serializer.write(nestingWidget, o);
const serialization = serializer.flush();
const B = performance.now();

timings.push(
`${B - A} (byte length = ${serialization.byteLength}, ${serialization.byteLength / 1024 / (B - A)} kb/ms)`
);
}

/**
* No assertion here.
* In the initial dual-pass implementation,
* par time is 0 to 23ms for up to 381014 bytes of CBOR. Up to 15 kb/ms. (kuhe's computer)
*/
console.log("CborShapeSerializer performance timings", timings);
});

it("should deserialize bytes", async () => {
const timings: string[] = [];
const strings = [];

// warmup
for (let i = 0; i < 12; ++i) {
const o = createNestingWidget(2 ** i);
serializer.write(nestingWidget, o);
const json = serializer.flush();
strings.push(json);
await deserializer.read(nestingWidget, json);
}

for (const s of strings) {
const A = performance.now();
await deserializer.read(nestingWidget, s);
const B = performance.now();

timings.push(`${B - A} (byte length = ${s.byteLength}, ${s.byteLength / 1024 / (B - A)} kb/ms)`);
}

/**
* No assertion here.
* In the initial dual-pass implementation,
* par time is 0 to 9ms for up to 190550 bytes of CBOR. Up to 23 kb/ms. (kuhe's computer)
*/
console.log("CborShapeDeserializer performance timings", timings);
});
}, 30_000);
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ import { NumericValue } from "@smithy/core/serde";
import type { TimestampEpochSecondsSchema } from "@smithy/types";
import { describe, expect, test as it } from "vitest";

import { widget } from "../test-schema.spec";
import { createNestingWidget, nestingWidget, widget } from "../test-schema.spec";
import { JsonShapeDeserializer } from "./JsonShapeDeserializer";
import { JsonShapeSerializer } from "./JsonShapeSerializer";

describe(JsonShapeDeserializer.name, () => {
let contextSourceAvailable = false;
Expand Down Expand Up @@ -153,4 +154,45 @@ describe(JsonShapeDeserializer.name, () => {
expect(await deserializer.read(widget, JSON.stringify({ scalar: "-Infinity" }))).toEqual({ scalar: -Infinity });
expect(await deserializer.read(widget, JSON.stringify({ scalar: "NaN" }))).toEqual({ scalar: NaN });
});

describe("performance baseline indicator", () => {
const serializer = new JsonShapeSerializer({
jsonName: true,
timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true },
});
serializer.setSerdeContext({
base64Encoder: (input: Uint8Array) => {
return Buffer.from(input).toString("base64");
},
} as any);

it("should deserialize JSON strings", async () => {
const timings: string[] = [];
const strings = [];

// warmup
for (let i = 0; i < 12; ++i) {
const o = createNestingWidget(2 ** i);
serializer.write(nestingWidget, o);
const json = serializer.flush();
strings.push(json);
await deserializer.read(nestingWidget, json);
}

for (const s of strings) {
const A = performance.now();
await deserializer.read(nestingWidget, s);
const B = performance.now();

timings.push(`${B - A} (JSON length = ${s.length}, ${s.length / 1024 / (B - A)} kb/ms)`);
}

/**
* No assertion here.
* In the initial dual-pass implementation,
* par time is 0 to 25ms for up to 288899 chars of JSON. Up to 13 kb/ms. (kuhe's computer)
*/
console.log("JsonShapeDeserializer performance timings", timings);
});
}, 30_000);
});
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,75 @@ import { NumericValue } from "@smithy/core/serde";
import type { TimestampEpochSecondsSchema } from "@smithy/types";
import { describe, expect, test as it } from "vitest";

import { widget } from "../test-schema.spec";
import { createNestingWidget, nestingWidget, widget } from "../test-schema.spec";
import { SinglePassJsonShapeSerializer } from "./experimental/SinglePassJsonShapeSerializer";
import { JsonShapeSerializer } from "./JsonShapeSerializer";

describe(JsonShapeSerializer.name, () => {
it("serializes data to JSON", async () => {
const serializer = new JsonShapeSerializer({
jsonName: true,
timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true },
});
serializer.setSerdeContext({
base64Encoder: (input: Uint8Array) => {
return Buffer.from(input).toString("base64");
},
} as any);
const serializer1 = new JsonShapeSerializer({
jsonName: true,
timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true },
});

const serializer2 = new SinglePassJsonShapeSerializer({
jsonName: true,
timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true },
});

it("serializes data to JSON", async () => {
const data = {
timestamp: new Date(0),
bigint: 10000000000000000000000054321n,
bigdecimal: new NumericValue("0.10000000000000000000000054321", "bigDecimal"),
blob: new Uint8Array([0, 0, 0, 1]),
};
serializer.write(widget, data);
const serialization = serializer.flush();
serializer1.write(widget, data);
const serialization = serializer1.flush();
expect(serialization).toEqual(
`{"blob":"AAAAAQ==","timestamp":0,"bigint":10000000000000000000000054321,"bigdecimal":0.10000000000000000000000054321}`
);
});

describe("performance baseline indicator", () => {
for (const serializer of [serializer1, serializer2]) {
it("should serialize objects", () => {
const timings: string[] = [];
const objects = [];

// warmup
for (let i = 0; i < 12; ++i) {
const o = createNestingWidget(2 ** i);
objects.push(o);
serializer.write(nestingWidget, o);
serializer.write(nestingWidget, o);
serializer.write(nestingWidget, o);
serializer.write(nestingWidget, o);
serializer.flush();
}

for (let i = 0; i < objects.length; ++i) {
const o = objects[i];

const A = performance.now();
serializer.write(nestingWidget, o);
const serialization = serializer.flush();
const B = performance.now();

timings.push(
`${B - A} (JSON length = ${serialization.length}, ${serialization.length / 1024 / (B - A)} kb/ms)`
);
}

/**
* No assertion here.
* In the initial dual-pass implementation,
* par time is 0 to 30ms for up to 288899 chars of JSON. Up to 11 kb/ms. (kuhe's computer)
*
* In the single-pass implementation using string buildup,
* par time is 0 to 51ms for up to 288899 chars of JSON. Up to 13 kb/ms. (kuhe's computer)
*/
console.log(`${serializer.constructor.name} performance timings`, timings);
});
}
}, 30_000);
});
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,7 @@ export class JsonShapeSerializer extends SerdeContextConfig implements ShapeSeri
if (ns === this.rootSchema) {
return value;
}
if (!this.serdeContext?.base64Encoder) {
return toBase64(value);
}
return this.serdeContext?.base64Encoder(value);
return (this.serdeContext?.base64Encoder ?? toBase64)(value);
}

if ((ns.isTimestampSchema() || ns.isDocumentSchema()) && value instanceof Date) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import { determineTimestampFormat } from "@smithy/core/protocols";
import { NormalizedSchema } from "@smithy/core/schema";
import { dateToUtcString, generateIdempotencyToken, LazyJsonString, NumericValue } from "@smithy/core/serde";
import type {
Schema,
ShapeSerializer,
TimestampDateTimeSchema,
TimestampEpochSecondsSchema,
TimestampHttpDateSchema,
} from "@smithy/types";
import { toBase64 } from "@smithy/util-base64";

import { SerdeContextConfig } from "../../ConfigurableSerdeContext";
import type { JsonSettings } from "../JsonCodec";

/**
* This implementation uses single-pass JSON serialization with JS code instead of
* JSON.stringify.
*
* It isn't significantly faster than dual-pass ending with native JSON.stringify
* that I would want to use it. It seems to be barely faster in some mid-range object
* sizes but slower on the high end.
*
* @internal
*/
export class SinglePassJsonShapeSerializer extends SerdeContextConfig implements ShapeSerializer<string> {
private buffer: any;
private rootSchema: NormalizedSchema | undefined;

public constructor(public readonly settings: JsonSettings) {
super();
}

public write(schema: Schema, value: unknown): void {
this.rootSchema = NormalizedSchema.of(schema);
this.buffer = this.writeObject(this.rootSchema, value);
}

/**
* @internal
*/
public writeDiscriminatedDocument(schema: Schema, value: unknown): void {
this.write(schema, value);
if (typeof this.buffer === "object") {
this.buffer.__type = NormalizedSchema.of(schema).getName(true);
}
}

public flush(): string {
this.rootSchema = undefined;

return this.buffer;
}

private writeObject(schema: Schema, value: unknown): string {
if (value == undefined) {
return "";
}

let b = "";
const ns = NormalizedSchema.of(schema);
const sparse = !!ns.getMergedTraits().sparse;

if (Array.isArray(value) && (ns.isDocumentSchema() || ns.isListSchema())) {
b += "[";
for (let i = 0; i < value.length; ++i) {
const item = value[i];
if (item != null || sparse) {
b += this.writeValue(ns.getValueSchema(), item);
b += ",";
}
}
} else if (ns.isStructSchema()) {
b += "{";
for (const [name, member] of ns.structIterator()) {
const item = (value as any)[name];
const targetKey = this.settings.jsonName ? member.getMergedTraits().jsonName ?? name : name;
const serializableValue = this.writeValue(member, item);
if (item != null || member.isIdempotencyToken()) {
b += `"${targetKey}":${serializableValue}`;
b += ",";
}
}
} else if (ns.isMapSchema() || ns.isDocumentSchema()) {
b += "{";
for (const [k, v] of Object.entries(value)) {
if (v != null || sparse) {
b += `"${k}":${this.writeValue(ns, v)}`;
b += ",";
}
}
}

if (b[b.length - 1] === ",") {
b = b.slice(0, -1);
}
if (b[0] === "[") {
b += "]";
}
if (b[0] === "{") {
b += "}";
}
return b;
}

private writeValue(schema: Schema, value: unknown): string {
const isObject = value !== null && typeof value === "object";

const ns = NormalizedSchema.of(schema);
const quote = (_: string) => `"${_}"`;

if (
(ns.isBlobSchema() && (value instanceof Uint8Array || typeof value === "string")) ||
(ns.isDocumentSchema() && value instanceof Uint8Array)
) {
return quote((this.serdeContext?.base64Encoder ?? toBase64)(value));
}

if ((ns.isTimestampSchema() || ns.isDocumentSchema()) && value instanceof Date) {
const format = determineTimestampFormat(ns, this.settings);
switch (format) {
case 5 satisfies TimestampDateTimeSchema:
return quote(value.toISOString().replace(".000Z", "Z"));
case 6 satisfies TimestampHttpDateSchema:
return quote(dateToUtcString(value));
case 7 satisfies TimestampEpochSecondsSchema:
return String(value.getTime() / 1000);
default:
console.warn("Missing timestamp format, using epoch seconds", value);
return String(value.getTime() / 1000);
}
}

if (ns.isNumericSchema() && typeof value === "number") {
if (Math.abs(value) === Infinity || isNaN(value)) {
return quote(String(value));
}
}

if (ns.isStringSchema()) {
if (typeof value === "undefined" && ns.isIdempotencyToken()) {
return quote(generateIdempotencyToken());
}

if (typeof value === "string") {
const mediaType = ns.getMergedTraits().mediaType;

if (mediaType) {
const isJson = mediaType === "application/json" || mediaType.endsWith("+json");
if (isJson) {
return quote(LazyJsonString.from(value).toString());
}
}
}
}

if (value instanceof NumericValue) {
// ns can be BigDecimal or Document.
return value.string;
}

if (isObject) {
return this.writeObject(ns, value);
}

return typeof value === "string" ? quote(value) : String(value);
}
}
Loading
Loading